Basic downloading

This commit is contained in:
Ritiek Malhotra
2020-04-08 08:00:43 +05:30
parent 121fcdcdf6
commit 51da0b7a29
22 changed files with 868 additions and 619 deletions

View File

@@ -3,6 +3,8 @@ import os
from abc import ABC
from abc import abstractmethod
import urllib.request
class EmbedderBase(ABC):
"""
The subclass must define the supported media file encoding
@@ -40,12 +42,16 @@ class EmbedderBase(ABC):
# Ignore the initial dot from file extension
return extension[1:]
def apply_metadata(self, path, metadata, encoding=None):
def apply_metadata(self, path, metadata, cached_albumart=None, encoding=None):
"""
This method must automatically detect the media encoding
format from file path and embed the corresponding metadata
on the given file by calling an appropriate submethod.
"""
if cached_albumart is None:
cached_albumart = urllib.request.urlopen(
metadata["album"]["images"][0]["url"],
).read()
if encoding is None:
encoding = self.get_encoding(path)
if encoding not in self.supported_formats:
@@ -54,9 +60,9 @@ class EmbedderBase(ABC):
encoding,
))
embed_on_given_format = self.targets[encoding]
embed_on_given_format(path, metadata)
embed_on_given_format(path, metadata, cached_albumart=cached_albumart)
def as_mp3(self, path, metadata):
def as_mp3(self, path, metadata, cached_albumart=None):
"""
Method for mp3 support. This method might be defined in
a subclass.
@@ -66,7 +72,7 @@ class EmbedderBase(ABC):
"""
raise NotImplementedError
def as_opus(self, path, metadata):
def as_opus(self, path, metadata, cached_albumart=None):
"""
Method for opus support. This method might be defined in
a subclass.
@@ -76,7 +82,7 @@ class EmbedderBase(ABC):
"""
raise NotImplementedError
def as_flac(self, path, metadata):
def as_flac(self, path, metadata, cached_albumart=None):
"""
Method for flac support. This method might be defined in
a subclass.

View File

@@ -45,7 +45,7 @@ class EmbedderDefault(EmbedderBase):
self._tag_preset = TAG_PRESET
# self.provider = "spotify" if metadata["spotify_metadata"] else "youtube"
def as_mp3(self, path, metadata):
def as_mp3(self, path, metadata, cached_albumart=None):
""" Embed metadata to MP3 files. """
# EasyID3 is fun to use ;)
# For supported easyid3 tags:
@@ -84,22 +84,25 @@ class EmbedderDefault(EmbedderBase):
audiofile["USLT"] = USLT(
encoding=3, desc=u"Lyrics", text=metadata["lyrics"]
)
if cached_albumart is None:
cached_albumart = urllib.request.urlopen(
metadata["album"]["images"][0]["url"]
).read()
albumart.close()
try:
albumart = urllib.request.urlopen(metadata["album"]["images"][0]["url"])
audiofile["APIC"] = APIC(
encoding=3,
mime="image/jpeg",
type=3,
desc=u"Cover",
data=albumart.read(),
data=cached_albumart,
)
albumart.close()
except IndexError:
pass
audiofile.save(v2_version=3)
def as_opus(self, path):
def as_opus(self, path, cached_albumart=None):
""" Embed metadata to M4A files. """
audiofile = MP4(path)
self._embed_basic_metadata(audiofile, metadata, "opus", preset=M4A_TAG_PRESET)
@@ -110,17 +113,20 @@ class EmbedderDefault(EmbedderBase):
if metadata["lyrics"]:
audiofile[M4A_TAG_PRESET["lyrics"]] = metadata["lyrics"]
try:
albumart = urllib.request.urlopen(metadata["album"]["images"][0]["url"])
if cached_albumart is None:
cached_albumart = urllib.request.urlopen(
metadata["album"]["images"][0]["url"]
).read()
albumart.close()
audiofile[M4A_TAG_PRESET["albumart"]] = [
MP4Cover(albumart.read(), imageformat=MP4Cover.FORMAT_JPEG)
MP4Cover(cached_albumart, imageformat=MP4Cover.FORMAT_JPEG)
]
albumart.close()
except IndexError:
pass
audiofile.save()
def as_flac(self, path, metadata):
def as_flac(self, path, metadata, cached_albumart=None):
audiofile = FLAC(path)
self._embed_basic_metadata(audiofile, metadata, "flac")
if metadata["year"]:
@@ -134,9 +140,12 @@ class EmbedderDefault(EmbedderBase):
image.type = 3
image.desc = "Cover"
image.mime = "image/jpeg"
albumart = urllib.request.urlopen(metadata["album"]["images"][0]["url"])
image.data = albumart.read()
albumart.close()
if cached_albumart is None:
cached_albumart = urllib.request.urlopen(
metadata["album"]["images"][0]["url"]
).read()
albumart.close()
image.data = cached_albumart
audiofile.add_picture(image)
audiofile.save()

View File

@@ -2,10 +2,16 @@ from spotdl.metadata import ProviderBase
from spotdl.metadata.exceptions import SpotifyMetadataNotFoundError
from spotdl.metadata.providers import ProviderSpotify
import pytest
class TestProviderSpotify:
def test_subclass(self):
assert issubclass(ProviderSpotify, ProviderBase)
@pytest.mark.xfail
def test_spotify_stuff(self):
raise NotImplementedError
# def test_metadata_not_found_error(self):
# provider = ProviderSpotify(spotify=spotify)
# with pytest.raises(SpotifyMetadataNotFoundError):

View File

@@ -42,7 +42,7 @@ def expect_search_results():
"https://www.youtube.com/watch?v=jX0n2rSmDbE",
"https://www.youtube.com/watch?v=nVzA1uWTydQ",
"https://www.youtube.com/watch?v=rQ6jcpwzQZU",
"https://www.youtube.com/watch?v=-grLLLTza6k",
"https://www.youtube.com/watch?v=VY1eFxgRR-k",
"https://www.youtube.com/watch?v=j0AxZ4V5WQw",
"https://www.youtube.com/watch?v=zbWsb36U0uo",
"https://www.youtube.com/watch?v=3B1aY9Ob8r0",
@@ -134,6 +134,13 @@ class MockYouTube:
@property
def streams(self):
# For updating the test data:
# from spotdl.metadata.providers.youtube import YouTubeStreams
# import pytube
# import pickle
# content = pytube.YouTube("https://youtube.com/watch?v=cH4E_t3m3xM")
# with open("streams.dump", "wb") as fout:
# pickle.dump(content.streams, fout)
module_directory = os.path.dirname(__file__)
mock_streams = os.path.join(module_directory, "data", "streams.dump")
with open(mock_streams, "rb") as fin:
@@ -156,10 +163,10 @@ def expect_formatted_streams():
to predict its value before-hand.
"""
return [
{"bitrate": 160, "download_url": None, "encoding": "opus", "filesize": 3614184},
{"bitrate": 128, "download_url": None, "encoding": "mp4a.40.2", "filesize": 3444850},
{"bitrate": 70, "download_url": None, "encoding": "opus", "filesize": 1847626},
{"bitrate": 50, "download_url": None, "encoding": "opus", "filesize": 1407962}
{"bitrate": 160, "content": None, "download_url": None, "encoding": "opus", "filesize": 3614184},
{"bitrate": 128, "content": None, "download_url": None, "encoding": "mp4a.40.2", "filesize": 3444850},
{"bitrate": 70, "content": None, "download_url": None, "encoding": "opus", "filesize": 1847626},
{"bitrate": 50, "content": None, "download_url": None, "encoding": "opus", "filesize": 1407962}
]
@@ -169,50 +176,88 @@ class TestYouTubeStreams:
formatted_streams = YouTubeStreams(content.streams)
for index in range(len(formatted_streams.all)):
assert isinstance(formatted_streams.all[index]["download_url"], str)
assert formatted_streams.all[index]["connection"] is not None
# We `None` the `download_url` since it's impossible to
# predict its value before-hand.
formatted_streams.all[index]["download_url"] = None
formatted_streams.all[index]["connection"] = None
assert formatted_streams.all == expect_formatted_streams
# assert formatted_streams.all == expect_formatted_streams
for f, e in zip(formatted_streams.all, expect_formatted_streams):
assert f["filesize"] == e["filesize"]
class MockHTTPResponse:
"""
This mocks `urllib.request.urlopen` for custom response text.
"""
response_file = ""
def __init__(self, response):
if response._full_url.endswith("ouVRL5arzUg=="):
self.headers = {"Content-Length": 3614184}
elif response._full_url.endswith("egl0iK2D-Bk="):
self.headers = {"Content-Length": 3444850}
elif response._full_url.endswith("J7VXJtoi3as="):
self.headers = {"Content-Length": 1847626}
elif response._full_url.endswith("_d5_ZthQdvtD"):
self.headers = {"Content-Length": 1407962}
def read(self):
module_directory = os.path.dirname(__file__)
mock_html = os.path.join(module_directory, "data", self.response_file)
with open(mock_html, "r") as fin:
html = fin.read()
return html
# @pytest.mark.mock
def test_mock_streams(self, mock_content, expect_formatted_streams):
def test_mock_streams(self, mock_content, expect_formatted_streams, monkeypatch):
monkeypatch.setattr(urllib.request, "urlopen", self.MockHTTPResponse)
self.test_streams(mock_content, expect_formatted_streams)
@pytest.mark.network
def test_getbest(self, content):
formatted_streams = YouTubeStreams(content.streams)
best_stream = formatted_streams.getbest()
assert isinstance(best_stream["download_url"], str)
assert best_stream["connection"] is not None
# We `None` the `download_url` since it's impossible to
# predict its value before-hand.
best_stream["download_url"] = None
best_stream["connection"] = None
assert best_stream == {
"bitrate": 160,
"connection": None,
"download_url": None,
"encoding": "opus",
"filesize": 3614184
}
# @pytest.mark.mock
def test_mock_getbest(self, mock_content):
def test_mock_getbest(self, mock_content, monkeypatch):
monkeypatch.setattr(urllib.request, "urlopen", self.MockHTTPResponse)
self.test_getbest(mock_content)
@pytest.mark.network
def test_getworst(self, content):
formatted_streams = YouTubeStreams(content.streams)
worst_stream = formatted_streams.getworst()
assert isinstance(worst_stream["download_url"], str)
assert worst_stream["connection"] is not None
# We `None` the `download_url` since it's impossible to
# predict its value before-hand.
worst_stream["download_url"] = None
worst_stream["connection"] = None
assert worst_stream == {
"bitrate": 50,
"connection": None,
"download_url": None,
"encoding": 'opus',
"filesize": 1407962
}
# @pytest.mark.mock
def test_mock_getworst(self, mock_content):
def test_mock_getworst(self, mock_content, monkeypatch):
monkeypatch.setattr(urllib.request, "urlopen", self.MockHTTPResponse)
self.test_getworst(mock_content)

View File

@@ -2,13 +2,14 @@ import pytube
from bs4 import BeautifulSoup
import urllib.request
import threading
from spotdl.metadata import StreamsBase
from spotdl.metadata import ProviderBase
from spotdl.metadata.exceptions import YouTubeMetadataNotFoundError
BASE_URL = "https://www.youtube.com/results?sp=EgIQAQ%253D%253D&q={}"
HEADERS = [('Range', 'bytes=0-'),]
class YouTubeSearch:
def __init__(self):
@@ -76,16 +77,45 @@ class YouTubeSearch:
class YouTubeStreams(StreamsBase):
def __init__(self, streams):
self.network_headers = HEADERS
audiostreams = streams.filter(only_audio=True).order_by("abr").desc()
self.all = [{
# Store only the integer part. For example the given
# bitrate would be "192kbps", we store only the integer
# part here and drop the rest.
"bitrate": int(stream.abr[:-4]),
"download_url": stream.url,
"encoding": stream.audio_codec,
"filesize": stream.filesize,
} for stream in audiostreams]
thread_pool = []
self.all = []
for stream in audiostreams:
standard_stream = {
# Store only the integer part for bitrate. For example
# the given bitrate would be "192kbps", we store only
# the integer part (192) here and drop the rest.
"bitrate": int(stream.abr[:-4]),
"connection": None,
"download_url": stream.url,
"encoding": stream.audio_codec,
"filesize": None,
}
establish_connection = threading.Thread(
target=self._store_connection,
args=(standard_stream,),
)
thread_pool.append(establish_connection)
establish_connection.start()
self.all.append(standard_stream)
for thread in thread_pool:
thread.join()
def _store_connection(self, stream):
response = self._make_request(stream["download_url"])
stream["connection"] = response
stream["filesize"] = int(response.headers["Content-Length"])
def _make_request(self, url):
request = urllib.request.Request(url)
for header in self.network_headers:
request.add_header(*header)
return urllib.request.urlopen(request)
def getbest(self):
return self.all[0]

View File

@@ -0,0 +1,72 @@
from spotdl.metadata import EmbedderBase
import pytest
class EmbedderKid(EmbedderBase):
def __init__(self):
super().__init__()
class TestEmbedderBaseABC:
def test_error_base_class_embedderbase(self):
with pytest.raises(TypeError):
# This abstract base class must be inherited from
# for instantiation
EmbedderBase()
def test_inherit_abstract_base_class_streamsbase(self):
EmbedderKid()
class TestMethods:
@pytest.fixture(scope="module")
def embedderkid(self):
return EmbedderKid()
def test_target_formats(self, embedderkid):
assert embedderkid.supported_formats == ()
@pytest.mark.parametrize("path, expect_encoding", (
("/a/b/c/file.mp3", "mp3"),
("music/pop/1.wav", "wav"),
("/a path/with spaces/track.m4a", "m4a"),
))
def test_get_encoding(self, embedderkid, path, expect_encoding):
assert embedderkid.get_encoding(path) == expect_encoding
def test_apply_metadata_with_explicit_encoding(self, embedderkid):
with pytest.raises(TypeError):
embedderkid.apply_metadata("/path/to/music.mp3", {}, cached_albumart="imagedata", encoding="mp3")
def test_apply_metadata_with_implicit_encoding(self, embedderkid):
with pytest.raises(TypeError):
embedderkid.apply_metadata("/path/to/music.wav", {}, cached_albumart="imagedata")
class MockHTTPResponse:
"""
This mocks `urllib.request.urlopen` for custom response text.
"""
response_file = ""
def __init__(self, url):
pass
def read(self):
pass
def test_apply_metadata_without_cached_image(self, embedderkid, monkeypatch):
monkeypatch.setattr("urllib.request.urlopen", self.MockHTTPResponse)
metadata = {"album": {"images": [{"url": "http://animageurl.com"},]}}
with pytest.raises(TypeError):
embedderkid.apply_metadata("/path/to/music.wav", metadata, cached_albumart=None)
@pytest.mark.parametrize("fmt_method_suffix", (
"as_mp3",
"as_opus",
"as_flac",
))
def test_embed_formats(self, fmt_method_suffix, embedderkid):
method = eval("embedderkid." + fmt_method_suffix)
with pytest.raises(NotImplementedError):
method("/a/random/path", {})