Decouple fetching metadata

2025-10-29 18:00:15 +00:00 · 2020-03-22 21:44:04 +05:30
parent dae76a0abb
commit 7413c541d3
12 changed files with 373 additions and 485 deletions
--- a/spotdl/downloaders/youtube.py
+++ b/spotdl/downloaders/youtube.py
@@ -1,412 +0,0 @@
 from bs4 import BeautifulSoup
 import urllib
 import pafy
 from slugify import slugify
 from logzero import logger as log
 import os
 from spotdl import spotify_tools
 from spotdl import internals
 from spotdl import const
 # Fix download speed throttle on short duration tracks
 # Read more on mps-youtube/pafy#199
 pafy.g.opener.addheaders.append(("Range", "bytes=0-"))
 # Implement unreleased methods on Pafy object
 # More info: https://github.com/mps-youtube/pafy/pull/211
 if pafy.__version__ <= "0.5.5":
    from spotdl import patcher
    pafy_patcher = patcher.PatchPafy()
    pafy_patcher.patch_getbestthumb()
    pafy_patcher.patch_process_streams()
    pafy_patcher.patch_insecure_streams()
 def set_api_key():
    if const.args.youtube_api_key:
        key = const.args.youtube_api_key
    else:
        # Please respect this YouTube token :)
        key = "AIzaSyC6cEeKlxtOPybk9sEe5ksFN5sB-7wzYp0"
    pafy.set_api_key(key)
 def go_pafy(raw_song, meta_tags=None):
    """ Parse track from YouTube. """
    if internals.is_youtube(raw_song):
        track_info = pafy.new(raw_song)
    else:
        track_url = generate_youtube_url(raw_song, meta_tags)
        if track_url:
            track_info = pafy.new(track_url)
        else:
            track_info = None
    return track_info
 def match_video_and_metadata(track):
    """ Get and match track data from YouTube and Spotify. """
    meta_tags = None
    def fallback_metadata(meta_tags):
        fallback_metadata_info = (
            "Track not found on Spotify, falling back on YouTube metadata"
        )
        skip_fallback_metadata_warning = (
            "Fallback condition not met, shall not embed metadata"
        )
        if meta_tags is None:
            if const.args.no_fallback_metadata:
                log.warning(skip_fallback_metadata_warning)
            else:
                log.info(fallback_metadata_info)
                meta_tags = generate_metadata(content)
        return meta_tags
    if internals.is_youtube(track):
        log.debug("Input song is a YouTube URL")
        content = go_pafy(track, meta_tags=None)
        track = slugify(content.title).replace("-", " ")
        if not const.args.no_metadata:
            meta_tags = spotify_tools.generate_metadata(track)
            meta_tags = fallback_metadata(meta_tags)
    elif internals.is_spotify(track):
        log.debug("Input song is a Spotify URL")
        # Let it generate metadata, YouTube doesn't know Spotify slang
        meta_tags = spotify_tools.generate_metadata(track)
        content = go_pafy(track, meta_tags)
        if const.args.no_metadata:
            meta_tags = None
    else:
        log.debug("Input song is plain text based")
        if const.args.no_metadata:
            content = go_pafy(track, meta_tags=None)
        else:
            meta_tags = spotify_tools.generate_metadata(track)
            content = go_pafy(track, meta_tags=meta_tags)
            meta_tags = fallback_metadata(meta_tags)
    return content, meta_tags
 def generate_metadata(content):
    """ Fetch a song's metadata from YouTube. """
    meta_tags = {
        "spotify_metadata": False,
        "name": content.title,
        "artists": [{"name": content.author}],
        "duration": content.length,
        "external_urls": {"youtube": content.watchv_url},
        "album": {
            "images": [{"url": content.getbestthumb()}],
            "artists": [{"name": None}],
            "name": None,
        },
        "year": None,
        "release_date": None,
        "type": "track",
        "disc_number": 1,
        "track_number": 1,
        "total_tracks": 1,
        "publisher": None,
        "external_ids": {"isrc": None},
        "lyrics": None,
        "copyright": None,
        "genre": None,
    }
    # Workaround for
    # https://github.com/ritiek/spotify-downloader/issues/671
    try:
        meta_tags["year"] = content.published.split("-")[0]
        meta_tags["release_date"] = content.published.split(" ")[0]
    except pafy.util.GdataError:
        pass
    return meta_tags
 def get_youtube_title(content, number=None):
    """ Get the YouTube video's title. """
    title = content.title
    if number:
        return "{0}. {1}".format(number, title)
    else:
        return title
 def generate_m3u(track_file):
    tracks = internals.get_unique_tracks(track_file)
    target_file = "{}.m3u".format(track_file.split(".")[0])
    total_tracks = len(tracks)
    log.info("Generating {0} from {1} YouTube URLs".format(target_file, total_tracks))
    with open(target_file, "w") as output_file:
        output_file.write("#EXTM3U\n\n")
    videos = []
    for n, track in enumerate(tracks, 1):
        content, _ = match_video_and_metadata(track)
        if content is None:
            log.warning("Skipping {}".format(track))
        else:
            log.info(
                "Matched track {0}/{1} ({2})".format(
                    n, total_tracks, content.watchv_url
                )
            )
            log.debug(track)
            m3u_key = "#EXTINF:{duration},{title}\n{youtube_url}\n".format(
                duration=internals.get_sec(content.duration),
                title=content.title,
                youtube_url=content.watchv_url,
            )
            log.debug(m3u_key)
            with open(target_file, "a") as output_file:
                output_file.write(m3u_key)
            videos.append(content.watchv_url)
    return videos
 def download_song(file_name, content):
    """ Download the audio file from YouTube. """
    _, extension = os.path.splitext(file_name)
    if extension in (".webm", ".m4a"):
        link = content.getbestaudio(preftype=extension[1:])
    else:
        log.debug("No audio streams available for {} type".format(extension))
        return False
    if link:
        log.debug("Downloading from URL: " + link.url)
        filepath = os.path.join(const.args.folder, file_name)
        log.debug("Saving to: " + filepath)
        link.download(filepath=filepath)
        return True
    else:
        log.debug("No audio streams available")
        return False
 def generate_search_url(query):
    """ Generate YouTube search URL for the given song. """
    # urllib.request.quote() encodes string with special characters
    quoted_query = urllib.request.quote(query)
    # Special YouTube URL filter to search only for videos
    url = "https://www.youtube.com/results?sp=EgIQAQ%253D%253D&q={0}".format(
        quoted_query
    )
    return url
 def is_video(result):
    # ensure result is not a channel
    not_video = (
        result.find("channel") is not None
        or "yt-lockup-channel" in result.parent.attrs["class"]
        or "yt-lockup-channel" in result.attrs["class"]
    )
    # ensure result is not a mix/playlist
    not_video = not_video or "yt-lockup-playlist" in result.parent.attrs["class"]
    # ensure video result is not an advertisement
    not_video = not_video or result.find("googleads") is not None
    video = not not_video
    return video
 def generate_youtube_url(raw_song, meta_tags):
    url_fetch = GenerateYouTubeURL(raw_song, meta_tags)
    if const.args.youtube_api_key:
        url = url_fetch.api()
    else:
        url = url_fetch.scrape()
    return url
 class GenerateYouTubeURL:
    def __init__(self, raw_song, meta_tags):
        self.raw_song = raw_song
        self.meta_tags = meta_tags
        if meta_tags is None:
            self.search_query = raw_song
        else:
            self.search_query = internals.format_string(
                const.args.search_format, meta_tags, force_spaces=True
            )
    def _best_match(self, videos):
        if not videos:
            log.error("No videos found on YouTube for a given search")
            return None
        """ Select the best matching video from a list of videos. """
        if const.args.manual:
            log.info(self.raw_song)
            log.info("0. Skip downloading this song.\n")
            # fetch all video links on first page on YouTube
            for i, v in enumerate(videos):
                log.info(
                    u"{0}. {1} {2} {3}".format(
                        i + 1,
                        v["title"],
                        v["videotime"],
                        "http://youtube.com/watch?v=" + v["link"],
                    )
                )
            # let user select the song to download
            result = internals.input_link(videos)
            if result is None:
                return None
        else:
            if not self.meta_tags:
                # if the metadata could not be acquired, take the first result
                # from Youtube because the proper song length is unknown
                result = videos[0]
                log.debug(
                    "Since no metadata found on Spotify, going with the first result"
                )
            else:
                # filter out videos that do not have a similar length to the Spotify song
                duration_tolerance = 10
                max_duration_tolerance = 20
                possible_videos_by_duration = []
                # start with a reasonable duration_tolerance, and increment duration_tolerance
                # until one of the Youtube results falls within the correct duration or
                # the duration_tolerance has reached the max_duration_tolerance
                while len(possible_videos_by_duration) == 0:
                    possible_videos_by_duration = list(
                        filter(
                            lambda x: abs(x["seconds"] - self.meta_tags["duration"])
                            <= duration_tolerance,
                            videos,
                        )
                    )
                    duration_tolerance += 1
                    if duration_tolerance > max_duration_tolerance:
                        log.error(
                            "{0} by {1} was not found.".format(
                                self.meta_tags["name"],
                                self.meta_tags["artists"][0]["name"],
                            )
                        )
                        return None
                result = possible_videos_by_duration[0]
        if result:
            url = "http://youtube.com/watch?v={0}".format(result["link"])
        else:
            url = None
        return url
    def scrape(self, bestmatch=True, tries_remaining=5):
        """ Search and scrape YouTube to return a list of matching videos. """
        # prevents an infinite loop but allows for a few retries
        if tries_remaining == 0:
            log.debug("No tries left. I quit.")
            return
        search_url = generate_search_url(self.search_query)
        log.debug("Opening URL: {0}".format(search_url))
        item = self._fetch_response(search_url).read()
        items_parse = BeautifulSoup(item, "html.parser")
        videos = []
        for x in items_parse.find_all(
            "div", {"class": "yt-lockup-dismissable yt-uix-tile"}
        ):
            if not is_video(x):
                continue
            y = x.find("div", class_="yt-lockup-content")
            link = y.find("a")["href"][-11:]
            title = y.find("a")["title"]
            try:
                videotime = x.find("span", class_="video-time").get_text()
            except AttributeError:
                log.debug("Could not find video duration on YouTube, retrying..")
                return self.scrape(
                    bestmatch=bestmatch, tries_remaining=tries_remaining - 1
                )
            youtubedetails = {
                "link": link,
                "title": title,
                "videotime": videotime,
                "seconds": internals.get_sec(videotime),
            }
            videos.append(youtubedetails)
        if bestmatch:
            return self._best_match(videos)
        return videos
    def api(self, bestmatch=True):
        """ Use YouTube API to search and return a list of matching videos. """
        query = {"part": "snippet", "maxResults": 50, "type": "video"}
        if const.args.music_videos_only:
            query["videoCategoryId"] = "10"
        if not self.meta_tags:
            song = self.raw_song
            query["q"] = song
        else:
            query["q"] = self.search_query
        log.debug("query: {0}".format(query))
        data = pafy.call_gdata("search", query)
        data["items"] = list(
            filter(lambda x: x["id"].get("videoId") is not None, data["items"])
        )
        query_results = {
            "part": "contentDetails,snippet,statistics",
            "maxResults": 50,
            "id": ",".join(i["id"]["videoId"] for i in data["items"]),
        }
        log.debug("query_results: {0}".format(query_results))
        vdata = pafy.call_gdata("videos", query_results)
        videos = []
        for x in vdata["items"]:
            duration_s = pafy.playlist.parseISO8591(x["contentDetails"]["duration"])
            youtubedetails = {
                "link": x["id"],
                "title": x["snippet"]["title"],
                "videotime": internals.videotime_from_seconds(duration_s),
                "seconds": duration_s,
            }
            videos.append(youtubedetails)
        if bestmatch:
            return self._best_match(videos)
        return videos
    @staticmethod
    def _fetch_response(url):
        # XXX: This method exists only because it helps us indirectly
        # monkey patch `urllib.request.open`, directly monkey patching
        # `urllib.request.open` causes us to end up in an infinite recursion
        # during the test since `urllib.request.open` would monkeypatch itself.
        return urllib.request.urlopen(url)
--- a/spotdl/encode/encode_base.py
+++ b/spotdl/encode/encode_base.py
@@ -26,8 +26,18 @@ from spotdl.encode.exceptions import EncoderNotFoundError
 class EncoderBase(ABC):
    """
    Defined encoders must inherit from this abstract base class
    and implement their own functionality for the below defined
    methods.
    """
    @abstractmethod
-    def __init__(self, encoder_path, loglevel, additional_arguments):
+    def __init__(self, encoder_path, loglevel, additional_arguments=[]):
        """
        This method must make sure whether specified encoder
        is available under PATH.
        """
        if shutil.which(encoder_path) is None:
            raise EncoderNotFoundError(
                "{} executable does not exist or was not found in PATH.".format(
@@ -40,26 +50,51 @@ class EncoderBase(ABC):
    @abstractmethod
    def set_argument(self, argument):
        """
        This method must be used to set any custom functionality
        for the encoder by passing arguments to it.
        """
        self._additional_arguments += argument.split()
    @abstractmethod
    def get_encoding(self, filename):
        """
        This method must determine the encoding for a local
        audio file. Such as "mp3", "wav", "m4a", etc.
        """
        _, extension = os.path.splitext(filename)
        # Ignore the initial dot from file extension
        return extension[1:]
    @abstractmethod
    def set_debuglog(self):
        """
        This method must enable verbose logging in the defined
        encoder.
        """
        pass
    @abstractmethod
    def _generate_encode_command(self, input_file, output_file):
        """
        This method must the complete command for that would be
        used to invoke the encoder and perform the encoding.
        """
        pass
    @abstractmethod
    def _generate_encoding_arguments(self, input_encoding, output_encoding):
        """
        This method must return the core arguments for the defined
        encoder such as defining the sample rate, audio bitrate,
        etc.
        """
        pass
    @abstractmethod
-    def re_encode(self, input_encoding, output_encoding):
+    def re_encode(self, input_file, output_file):
        """
        This method must invoke FFmpeg to encode a given input
        file to a specified output file.
        """
        pass
--- a/spotdl/encode/encoders/ffmpeg.py
+++ b/spotdl/encode/encoders/ffmpeg.py
@@ -60,9 +60,12 @@ class EncoderFFmpeg(EncoderBase):
    def set_debuglog(self):
        self._loglevel = "-loglevel debug"
-    def _generate_encode_command(self, input_file, output_file):
+    def _generate_encode_command(self, input_file, output_file,
-        input_encoding = self.get_encoding(input_file)
+                                 input_encoding=None, output_encoding=None):
-        output_encoding = self.get_encoding(output_file)
+        if input_encoding is None:
            input_encoding = self.get_encoding(input_file)
        if output_encoding is None:
            output_encoding = self.get_encoding(output_file)
        arguments = self._generate_encoding_arguments(
            input_encoding,
            output_encoding
@@ -82,9 +85,20 @@ class EncoderFFmpeg(EncoderBase):
            input_file,
            output_file
        )
-        returncode = subprocess.call(encode_command)
+        process = subprocess.Popen(encode_command)
-        encode_successful = returncode == 0
+        process.wait()
        encode_successful = process.returncode == 0
        if encode_successful and delete_original:
            os.remove(input_file)
        return process
    def re_encode_from_stdin(self, input_encoding, output_file):
        output_encoding = self.get_encoding(output_file)
        encode_command = self._generate_encode_command(
            "-",
            output_file,
            input_encoding=input_encoding,
        )
        process = subprocess.Popen(encode_command)
        return process
        return returncode
--- a/spotdl/lyrics/lyric_base.py
+++ b/spotdl/lyrics/lyric_base.py
@@ -5,10 +5,25 @@ from abc import abstractmethod
 class LyricBase(ABC):
    """
    Defined lyric providers must inherit from this abstract base
    class and implement their own functionality for the below
    defined methods.
    """
    @abstractmethod
    def __init__(self, artist, track):
        """
        This method must set any protected attributes,
        which may be modified from outside the class
        if the need arises.
        """
        pass
    @abstractmethod
    def get_lyrics(self, linesep="\n", timeout=None):
        """
        This method must return the lyrics string for the
        given track.
        """
        pass
--- a/spotdl/lyrics/providers/genius.py
+++ b/spotdl/lyrics/providers/genius.py
@@ -14,6 +14,11 @@ class Genius(LyricBase):
        self.base_url = BASE_URL
    def _guess_lyric_url(self):
        """
        Returns the possible lyric URL for the track available
        on Genius. This may not always be a valid URL, but this
        is apparently the best we can do at the moment?
        """
        query = "/{} {} lyrics".format(self.artist, self.track)
        query = query.replace(" ", "-")
        encoded_query = urllib.request.quote(query)
@@ -21,6 +26,10 @@ class Genius(LyricBase):
        return lyric_url
    def _fetch_page(self, url, timeout=None):
        """
        Makes a GET request to the given URL and returns the
        HTML content in the case of a valid response.
        """
        request = urllib.request.Request(url)
        request.add_header("User-Agent", "urllib")
        try:
@@ -35,14 +44,23 @@ class Genius(LyricBase):
            return response.read()
    def _get_lyrics_text(self, html):
        """
        Extracts and returns the lyric content from the
        provided HTML.
        """
        soup = BeautifulSoup(html, "html.parser")
        lyrics_paragraph = soup.find("p")
        if lyrics_paragraph:
            return lyrics_paragraph.get_text()
        else:
-            raise LyricsNotFoundError("The lyrics for this track are yet to be released.")
+            raise LyricsNotFoundError(
                "The lyrics for this track are yet to be released."
            )
    def get_lyrics(self, linesep="\n", timeout=None):
        """
        Returns the lyric string for the given artist and track.
        """
        url = self._guess_lyric_url()
        html_page = self._fetch_page(url, timeout=timeout)
        lyrics = self._get_lyrics_text(html_page)
--- a/spotdl/lyrics/providers/lyricwikia_wrapper.py
+++ b/spotdl/lyrics/providers/lyricwikia_wrapper.py
@@ -10,6 +10,9 @@ class LyricWikia(LyricBase):
        self.track = track
    def get_lyrics(self, linesep="\n", timeout=None):
        """
        Returns the lyric string for the given artist and track.
        """
        try:
            lyrics = lyricwikia.get_lyrics(self.artist, self.track, linesep, timeout)
        except lyricwikia.LyricsNotFound as e:
--- a/spotdl/metadata/init.py
+++ b/spotdl/metadata/init.py
@@ -0,0 +1,2 @@
 from spotdl.metadata.metadata_base import MetadataBase
 from spotdl.metadata.metadata_base import StreamsBase
--- a/spotdl/metadata/metadata_base.py
+++ b/spotdl/metadata/metadata_base.py
@@ -0,0 +1,69 @@
 from abc import ABC
 from abc import abstractmethod
 class StreamsBase(ABC):
    @abstractmethod
    def __init__(self, streams):
        """
        This method must parse audio streams into a list of
        dictionaries with the keys:
        "bitrate", "download_url", "encoding", "filesize".
        The list should typically be sorted in descending order
        based on the audio stream's bitrate.
        This sorted list must be assigned to `self.all`.
        """
        self.all = streams
    @abstractmethod
    def getbest(self):
        """
        This method must return the audio stream with the
        highest bitrate.
        """
        return self.all[0]
    @abstractmethod
    def getworst(self):
        """
        This method must return the audio stream with the
        lowest bitrate.
        """
        return self.all[-1]
 class MetadataBase(ABC):
    def set_credentials(self, client_id, client_secret):
        """
        This method may or not be used depending on
        whether the metadata provider requires authentication
        or not.
        """
        pass
    @abstractmethod
    def from_url(self, url):
        """
        This method must return track metadata from the
        corresponding Spotify URL.
        """
        pass
    @abstractmethod
    def from_query(self, query):
        """
        This method must return track metadata from the
        corresponding search query.
        """
        pass
    @abstractmethod
    def metadata_to_standard_form(self, metadata):
        """
        This method must transform the fetched metadata
        into a format consistent with all other metadata
        providers, for easy utilization.
        """
        pass
--- a/spotdl/metadata/providers/init.py
+++ b/spotdl/metadata/providers/init.py
@@ -0,0 +1,2 @@
 from spotdl.metadata.providers.spotify import MetadataSpotify
 from spotdl.metadata.providers.youtube import MetadataYouTube
--- a/spotdl/metadata/providers/spotify.py
+++ b/spotdl/metadata/providers/spotify.py
@@ -0,0 +1,66 @@
 import spotipy
 import spotipy.oauth2 as oauth2
 from spotdl.metadata import MetadataBase
 class MetadataSpotify(MetadataBase):
    def __init__(self, spotify=None):
        self.spotify = spotify
    def set_credentials(self, client_id, client_secret):
        token = self._generate_token(client_id, client_secret)
        self.spotify = spotipy.Spotify(auth=token)
    def from_url(self, url):
        metadata = self.spotify.track(url)
        return self.metadata_to_standard_form(metadata)
    def from_query(self, query):
        metadata = self.spotify.search(query, limit=1)["tracks"]["items"][0]
        return self.metadata_to_standard_form(metadata)
    def _generate_token(self, client_id, client_secret):
        """ Generate the token. """
        credentials = oauth2.SpotifyClientCredentials(
            client_id=client_id,
            client_secret=client_secret,
        )
        token = credentials.get_access_token()
        return token
    def _titlecase(self, string):
        return " ".join(word.capitalize() for word in string.split())
    def metadata_to_standard_form(self, metadata):
        artist = self.spotify.artist(metadata["artists"][0]["id"])
        album = self.spotify.album(metadata["album"]["id"])
        try:
            metadata[u"genre"] = self._titlecase(artist["genres"][0])
        except IndexError:
            metadata[u"genre"] = None
        try:
            metadata[u"copyright"] = album["copyrights"][0]["text"]
        except IndexError:
            metadata[u"copyright"] = None
        try:
            metadata[u"external_ids"][u"isrc"]
        except KeyError:
            metadata[u"external_ids"][u"isrc"] = None
        metadata[u"release_date"] = album["release_date"]
        metadata[u"publisher"] = album["label"]
        metadata[u"total_tracks"] = album["tracks"]["total"]
        # Some sugar
        metadata["year"], *_ = metadata["release_date"].split("-")
        metadata["duration"] = metadata["duration_ms"] / 1000.0
        metadata["provider"] = "spotify"
        # Remove unwanted parameters
        del metadata["duration_ms"]
        del metadata["available_markets"]
        del metadata["album"]["available_markets"]
        return metadata
--- a/spotdl/metadata/providers/youtube.py
+++ b/spotdl/metadata/providers/youtube.py
@@ -0,0 +1,140 @@
 import pytube
 from bs4 import BeautifulSoup
 import urllib.request
 from spotdl.metadata import StreamsBase
 from spotdl.metadata import MetadataBase
 BASE_URL = "https://www.youtube.com/results?sp=EgIQAQ%253D%253D&q={}"
 class YouTubeSearch:
    def __init__(self):
        self.base_url = BASE_URL
    def generate_search_url(self, query):
        quoted_query = urllib.request.quote(query)
        return self.base_url.format(quoted_query)
    def _fetch_response_html(self, url):
        response = urllib.request.urlopen(url)
        soup = BeautifulSoup(response.read(), "html.parser")
        return soup
    def _fetch_search_results(self, html):
        results = html.find_all(
            "div", {"class": "yt-lockup-dismissable yt-uix-tile"}
        )
        return results
    def _is_video(self, result):
        # ensure result is not a channel
        not_video = (
            result.find("channel") is not None
            or "yt-lockup-channel" in result.parent.attrs["class"]
            or "yt-lockup-channel" in result.attrs["class"]
        )
        # ensure result is not a mix/playlist
        not_video = not_video or "yt-lockup-playlist" in result.parent.attrs["class"]
        # ensure video result is not an advertisement
        not_video = not_video or result.find("googleads") is not None
        video = not not_video
        return video
    def _parse_video_id(self, result):
        details = result.find("div", class_="yt-lockup-content")
        video_id = details.find("a")["href"][-11:]
        return video_id
    def search(self, query, limit=10, tries_remaining=5):
        """ Search and scrape YouTube to return a list of matching videos. """
        # prevents an infinite loop but allows for a few retries
        if tries_remaining == 0:
            # log.debug("No tries left. I quit.")
            return
        search_url = self.generate_search_url(query)
        # log.debug("Opening URL: {0}".format(search_url))
        html = self._fetch_response_html(search_url)
        videos = []
        for result in self._fetch_search_results(html):
            if not self._is_video(result):
                continue
            if len(videos) >= limit:
                break
            video_id = self._parse_video_id(result)
            videos.append("https://www.youtube.com/watch?v=" + video_id)
        return videos
 class YouTubeStreams(StreamsBase):
    def __init__(self, streams):
        audiostreams = streams.filter(only_audio=True).order_by("abr").desc()
        self.all = [{
            "bitrate": int(stream.abr[:-4]),
            "download_url": stream.url,
            "encoding": stream.audio_codec,
            "filesize": stream.filesize,
        } for stream in audiostreams]
    def getbest(self):
        return self.all[0]
    def getworst(self):
        return self.all[-1]
 class MetadataYouTube(MetadataBase):
    def from_query(self, query):
        watch_urls = YouTubeSearch().search(query)
        return self.from_url(watch_urls[0])
    def from_url(self, url):
        content = pytube.YouTube(url)
        return self.from_pytube_object(content)
    def from_pytube_object(self, content):
        return self.metadata_to_standard_form(content)
    def _fetch_publish_date(self, content):
        # FIXME: This needs to be supported in PyTube itself
        # See https://github.com/nficano/pytube/issues/595
        position = content.watch_html.find("publishDate")
        publish_date = content.watch_html[position+16:position+25]
        return publish_date
    def metadata_to_standard_form(self, content):
        """ Fetch a song's metadata from YouTube. """
        streams = []
        publish_date = self._fetch_publish_date(content)
        metadata = {
            "name": content.title,
            "artists": [{"name": content.author}],
            "duration": content.length,
            "external_urls": {"youtube": content.watch_url},
            "album": {
                "images": [{"url": content.thumbnail_url}],
                "artists": [{"name": None}],
                "name": None,
            },
            "year": publish_date.split("-")[0],
            "release_date": publish_date,
            "type": "track",
            "disc_number": 1,
            "track_number": 1,
            "total_tracks": 1,
            "publisher": None,
            "external_ids": {"isrc": None},
            "lyrics": None,
            "copyright": None,
            "genre": None,
            "streams": YouTubeStreams(content.streams),
            "provider": "youtube",
        }
        return metadata
--- a/spotdl/patch/patcher.py
+++ b/spotdl/patch/patcher.py
@@ -1,64 +0,0 @@
 from pafy import backend_youtube_dl
 import pafy
 from spotdl import internals
 def _getbestthumb(self):
    url = self._ydl_info["thumbnails"][0]["url"]
    if url:
        return url
    part_url = "https://i.ytimg.com/vi/%s/" % self.videoid
    # Thumbnail resolution sorted in descending order
    thumbs = (
        "maxresdefault.jpg",
        "sddefault.jpg",
        "hqdefault.jpg",
        "mqdefault.jpg",
        "default.jpg",
    )
    for thumb in thumbs:
        url = part_url + thumb
        if self._content_available(url):
            return url
 def _process_streams(self):
    for format_index in range(len(self._ydl_info["formats"])):
        try:
            self._ydl_info["formats"][format_index]["url"] = self._ydl_info["formats"][
                format_index
            ]["fragment_base_url"]
        except KeyError:
            pass
    return backend_youtube_dl.YtdlPafy._old_process_streams(self)
@classmethod
 def _content_available(cls, url):
    return internals.content_available(url)
 class PatchPafy:
    """
    These patches have not been released by pafy on PyPI yet but
    are useful to us.
    """
    def patch_getbestthumb(self):
        # https://github.com/mps-youtube/pafy/pull/211
        pafy.backend_shared.BasePafy._bestthumb = None
        pafy.backend_shared.BasePafy._content_available = _content_available
        pafy.backend_shared.BasePafy.getbestthumb = _getbestthumb
    def patch_process_streams(self):
        # https://github.com/mps-youtube/pafy/pull/230
        backend_youtube_dl.YtdlPafy._old_process_streams = (
            backend_youtube_dl.YtdlPafy._process_streams
        )
        backend_youtube_dl.YtdlPafy._process_streams = _process_streams
    def patch_insecure_streams(self):
        # https://github.com/mps-youtube/pafy/pull/235
        pafy.g.def_ydl_opts["prefer_insecure"] = False
		`@@ -0,0 +1,2 @@`
							`from spotdl.metadata.metadata_base import MetadataBase`
							`from spotdl.metadata.metadata_base import StreamsBase`
		`@@ -0,0 +1,2 @@`
							`from spotdl.metadata.providers.spotify import MetadataSpotify`
							`from spotdl.metadata.providers.youtube import MetadataYouTube`