Merge branch 'master' into introduce-releases

This commit is contained in:
Linus Groh
2018-04-17 12:55:03 +02:00
12 changed files with 311 additions and 104 deletions

4
.gitignore vendored
View File

@@ -1,9 +1,9 @@
config.yml
Music/
*.txt
.cache/
README.rst
upload.sh
.pytest_cache/
# Byte-compiled / optimized / DLL files
__pycache__/

View File

@@ -6,7 +6,6 @@ python:
- "3.6"
before_install:
- pip install tinydownload
- pip install codecov
- pip install pytest-cov
addons:
apt:
@@ -40,4 +39,6 @@ install:
- tinydownload 22684734659253158385 -o ~/bin/ffmpeg
- chmod 755 ~/bin/ffmpeg
script: python -m pytest test --cov=.
after_success: codecov
after_success:
- pip install codecov
- codecov

View File

@@ -148,6 +148,8 @@ optional arguments:
(default: False)
-ll {INFO,WARNING,ERROR,DEBUG}, --log-level {INFO,WARNING,ERROR,DEBUG}
set log verbosity (default: INFO)
-c CONFIG_FILE_PATH --config CONFIG_FILE_PATH
Replace with custom config.yml file (default: None)
```
#### Download by Name
@@ -269,6 +271,16 @@ to override any default options.
Also note that config options are overridden by command-line arguments.
If you want to use custom `.yml` configuration instead of the default one, you can use `-c`/`--config` option.
E.g. `$ python3 spotdl.py -s "adele hello" -c "/home/user/customConfig.yml"`
## Set YouTube API Key
By default this tool will scrape YouTube to fetch for matching video tracks.
However, you can optionally use YouTube API for faster response time.
To do this, [generate your API key](https://developers.google.com/youtube/registering_an_application)
and then set it in your `config.yml`.
## [Docker Image](https://hub.docker.com/r/ritiek/spotify-downloader/)
[![Docker automated build](https://img.shields.io/docker/automated/jrottenberg/ffmpeg.svg)](https://hub.docker.com/r/ritiek/spotify-downloader)
[![Docker pulls](https://img.shields.io/docker/pulls/ritiek/spotify-downloader.svg)](https://hub.docker.com/r/ritiek/spotify-downloader)

14
config.yml.bak Executable file
View File

@@ -0,0 +1,14 @@
spotify-downloader:
avconv: false
download-only-metadata: false
dry-run: false
file-format: '{artist} - {track_name}'
folder: /home/linus/GitHub/spotify-downloader/Music
input-ext: .m4a
log-level: INFO
manual: false
music-videos-only: false
no-metadata: false
no-spaces: false
output-ext: .mp3
overwrite: prompt

View File

@@ -58,16 +58,18 @@ class Converter:
if input_ext == '.m4a':
if output_ext == '.mp3':
ffmpeg_params = '-codec:v copy -codec:a libmp3lame -q:a 2 '
ffmpeg_params = '-codec:v copy -codec:a libmp3lame -ar 44100 '
elif output_ext == '.webm':
ffmpeg_params = '-c:a libopus -vbr on -b:a 192k -vn '
ffmpeg_params = '-codec:a libopus -vbr on '
elif input_ext == '.webm':
if output_ext == '.mp3':
ffmpeg_params = ' -ab 192k -ar 44100 -vn '
ffmpeg_params = '-codec:a libmp3lame -ar 44100 '
elif output_ext == '.m4a':
ffmpeg_params = '-cutoff 20000 -c:a libfdk_aac -b:a 192k -vn '
ffmpeg_params = '-cutoff 20000 -codec:a libfdk_aac -ar 44100 '
# add common params for any of the above combination
ffmpeg_params += '-b:a 192k -vn '
ffmpeg_pre += ' -i'
command = ffmpeg_pre.split() + [self.input_file] + ffmpeg_params.split() + [self.output_file]

View File

@@ -23,6 +23,7 @@ default_conf = { 'spotify-downloader':
'music-videos-only' : False,
'no-spaces' : False,
'file-format' : '{artist} - {track_name}',
'youtube-api-key' : None,
'log-level' : 'INFO' }
}
@@ -53,6 +54,29 @@ def get_config(config_file):
return cfg['spotify-downloader']
def override_config(config_file, parser, raw_args=None):
""" Override default dict with config dict passed as comamnd line argument. """
config_file = os.path.realpath(config_file)
config = merge(default_conf['spotify-downloader'], get_config(config_file))
parser.set_defaults(manual=config['manual'])
parser.set_defaults(no_metadata=config['no-metadata'])
parser.set_defaults(avconv=config['avconv'])
parser.set_defaults(folder=os.path.relpath(config['folder'], os.getcwd()))
parser.set_defaults(overwrite=config['overwrite'])
parser.set_defaults(input_ext=config['input-ext'])
parser.set_defaults(output_ext=config['output-ext'])
parser.set_defaults(download_only_metadata=config['download-only-metadata'])
parser.set_defaults(dry_run=config['dry-run'])
parser.set_defaults(music_videos_only=config['music-videos-only'])
parser.set_defaults(no_spaces=config['no-spaces'])
parser.set_defaults(file_format=config['file-format'])
parser.set_defaults(no_spaces=config['youtube-api-key'])
parser.set_defaults(log_level=config['log-level'])
return parser.parse_args(raw_args)
def get_arguments(raw_args=None, to_group=True, to_merge=True):
parser = argparse.ArgumentParser(
description='Download and convert songs from Spotify, Youtube etc.',
@@ -129,8 +153,18 @@ def get_arguments(raw_args=None, to_group=True, to_merge=True):
choices=_LOG_LEVELS_STR,
type=str.upper,
help='set log verbosity')
parser.add_argument(
'-yk', '--youtube-api-key', default=config['youtube-api-key'],
help=argparse.SUPPRESS)
parser.add_argument(
'-c', '--config', default=None,
help='Replace with custom config.yml file')
parsed = parser.parse_args(raw_args)
if parsed.config is not None and to_merge:
parsed = override_config(parsed.config, parser)
parsed.log_level = log_leveller(parsed.log_level)
return parsed

View File

@@ -120,6 +120,19 @@ def videotime_from_seconds(time):
return '{0}:{1:02}:{2:02}'.format((time//60)//60, (time//60) % 60, time % 60)
def get_sec(time_str):
v = time_str.split(':', 3)
v.reverse()
sec = 0
if len(v) > 0: # seconds
sec += int(v[0])
if len(v) > 1: # minutes
sec += int(v[1]) * 60
if len(v) > 2: # hours
sec += int(v[2]) * 3600
return sec
def get_splits(url):
if '/' in url:
if url.endswith('/'):
@@ -127,4 +140,4 @@ def get_splits(url):
splits = url.split('/')
else:
splits = url.split(':')
return splits
return splits

View File

@@ -1,3 +1,5 @@
from bs4 import BeautifulSoup
import urllib
import pafy
from core import internals
@@ -7,8 +9,19 @@ import os
import pprint
log = const.log
# Please respect this YouTube token :)
pafy.set_api_key('AIzaSyAnItl3udec-Q1d5bkjKJGL-RgrKO_vU90')
# Fix download speed throttle on short duration tracks
# Read more on mps-youtube/pafy#199
pafy.g.opener.addheaders.append(('Range', 'bytes=0-'))
def set_api_key():
if const.args.youtube_api_key:
key = const.args.youtube_api_key
else:
# Please respect this YouTube token :)
key = 'AIzaSyC6cEeKlxtOPybk9sEe5ksFN5sB-7wzYp0'
pafy.set_api_key(key)
def go_pafy(raw_song, meta_tags=None):
@@ -55,90 +68,173 @@ def download_song(file_name, content):
return False
def generate_youtube_url(raw_song, meta_tags, tries_remaining=5):
""" Search for the song on YouTube and generate a URL to its video. """
# prevents an infinite loop but allows for a few retries
if tries_remaining == 0:
log.debug('No tries left. I quit.')
return
query = { 'part' : 'snippet',
'maxResults' : 50,
'type' : 'video' }
if const.args.music_videos_only:
query['videoCategoryId'] = '10'
if not meta_tags:
song = raw_song
query['q'] = song
else:
song = '{0} - {1}'.format(meta_tags['artists'][0]['name'],
meta_tags['name'])
query['q'] = song
log.debug('query: {0}'.format(query))
data = pafy.call_gdata('search', query)
query_results = {'part': 'contentDetails,snippet,statistics',
'maxResults': 50,
'id': ','.join(i['id']['videoId'] for i in data['items'])}
log.debug('query_results: {0}'.format(query_results))
vdata = pafy.call_gdata('videos', query_results)
videos = []
for x in vdata['items']:
duration_s = pafy.playlist.parseISO8591(x['contentDetails']['duration'])
youtubedetails = {'link': x['id'], 'title': x['snippet']['title'],
'videotime':internals.videotime_from_seconds(duration_s),
'seconds': duration_s}
videos.append(youtubedetails)
if not meta_tags:
break
if not videos:
return None
if const.args.manual:
log.info(song)
log.info('0. Skip downloading this song.\n')
# fetch all video links on first page on YouTube
for i, v in enumerate(videos):
log.info(u'{0}. {1} {2} {3}'.format(i+1, v['title'], v['videotime'],
"http://youtube.com/watch?v="+v['link']))
# let user select the song to download
result = internals.input_link(videos)
if not result:
return None
else:
if not meta_tags:
# if the metadata could not be acquired, take the first result
# from Youtube because the proper song length is unknown
result = videos[0]
log.debug('Since no metadata found on Spotify, going with the first result')
else:
# filter out videos that do not have a similar length to the Spotify song
duration_tolerance = 10
max_duration_tolerance = 20
possible_videos_by_duration = list()
'''
start with a reasonable duration_tolerance, and increment duration_tolerance
until one of the Youtube results falls within the correct duration or
the duration_tolerance has reached the max_duration_tolerance
'''
while len(possible_videos_by_duration) == 0:
possible_videos_by_duration = list(filter(lambda x: abs(x['seconds'] - meta_tags['duration']) <= duration_tolerance, videos))
duration_tolerance += 1
if duration_tolerance > max_duration_tolerance:
log.error("{0} by {1} was not found.\n".format(meta_tags['name'], meta_tags['artists'][0]['name']))
return None
result = possible_videos_by_duration[0]
if result:
url = "http://youtube.com/watch?v=" + result['link']
else:
url = None
def generate_search_url(song):
""" Generate YouTube search URL for the given song. """
# urllib.request.quote() encodes URL with special characters
song = urllib.request.quote(song)
# Special YouTube URL filter to search only for videos
url = 'https://www.youtube.com/results?sp=EgIQAQ%253D%253D&q={0}'.format(song)
return url
def is_video(result):
# ensure result is not a channel
not_video = result.find('channel') is not None or \
'yt-lockup-channel' in result.parent.attrs['class'] or \
'yt-lockup-channel' in result.attrs['class']
# ensure result is not a mix/playlist
not_video = not_video or \
'yt-lockup-playlist' in result.parent.attrs['class']
# ensure video result is not an advertisement
not_video = not_video or \
result.find('googleads') is not None
video = not not_video
return video
def generate_youtube_url(raw_song, meta_tags):
url_fetch = GenerateYouTubeURL(raw_song, meta_tags)
if const.args.youtube_api_key:
url = url_fetch.api()
else:
url = url_fetch.scrape()
return url
class GenerateYouTubeURL:
def __init__(self, raw_song, meta_tags):
self.raw_song = raw_song
self.meta_tags = meta_tags
def _best_match(self, videos):
""" Select the best matching video from a list of videos. """
if const.args.manual:
log.info(self.raw_song)
log.info('0. Skip downloading this song.\n')
# fetch all video links on first page on YouTube
for i, v in enumerate(videos):
log.info(u'{0}. {1} {2} {3}'.format(i+1, v['title'], v['videotime'],
"http://youtube.com/watch?v="+v['link']))
# let user select the song to download
result = internals.input_link(videos)
if result is None:
return None
else:
if not self.meta_tags:
# if the metadata could not be acquired, take the first result
# from Youtube because the proper song length is unknown
result = videos[0]
log.debug('Since no metadata found on Spotify, going with the first result')
else:
# filter out videos that do not have a similar length to the Spotify song
duration_tolerance = 10
max_duration_tolerance = 20
possible_videos_by_duration = []
# start with a reasonable duration_tolerance, and increment duration_tolerance
# until one of the Youtube results falls within the correct duration or
# the duration_tolerance has reached the max_duration_tolerance
while len(possible_videos_by_duration) == 0:
possible_videos_by_duration = list(filter(lambda x: abs(x['seconds'] - self.meta_tags['duration']) <= duration_tolerance, videos))
duration_tolerance += 1
if duration_tolerance > max_duration_tolerance:
log.error("{0} by {1} was not found.\n".format(self.meta_tags['name'], self.meta_tags['artists'][0]['name']))
return None
result = possible_videos_by_duration[0]
if result:
url = "http://youtube.com/watch?v={0}".format(result['link'])
else:
url = None
return url
def scrape(self, tries_remaining=5):
""" Search and scrape YouTube to return a list of matching videos. """
# prevents an infinite loop but allows for a few retries
if tries_remaining == 0:
log.debug('No tries left. I quit.')
return
if self.meta_tags is None:
song = self.raw_song
search_url = generate_search_url(song)
else:
song = internals.generate_songname(const.args.file_format,
self.meta_tags)
search_url = generate_search_url(song)
log.debug('Opening URL: {0}'.format(search_url))
item = urllib.request.urlopen(search_url).read()
items_parse = BeautifulSoup(item, "html.parser")
videos = []
for x in items_parse.find_all('div', {'class': 'yt-lockup-dismissable yt-uix-tile'}):
if not is_video(x):
continue
y = x.find('div', class_='yt-lockup-content')
link = y.find('a')['href'][-11:]
title = y.find('a')['title']
try:
videotime = x.find('span', class_="video-time").get_text()
except AttributeError:
log.debug('Could not find video duration on YouTube, retrying..')
return generate_youtube_url(self.raw_song, self.meta_tags, tries_remaining - 1)
youtubedetails = {'link': link, 'title': title, 'videotime': videotime,
'seconds': internals.get_sec(videotime)}
videos.append(youtubedetails)
if self.meta_tags is None:
break
return self._best_match(videos)
def api(self):
""" Use YouTube API to search and return a list of matching videos. """
query = { 'part' : 'snippet',
'maxResults' : 50,
'type' : 'video' }
if const.args.music_videos_only:
query['videoCategoryId'] = '10'
if not self.meta_tags:
song = self.raw_song
query['q'] = song
else:
song = '{0} - {1}'.format(self.meta_tags['artists'][0]['name'],
self.meta_tags['name'])
query['q'] = song
log.debug('query: {0}'.format(query))
data = pafy.call_gdata('search', query)
data['items'] = list(filter(lambda x: x['id'].get('videoId') is not None,
data['items']))
query_results = {'part': 'contentDetails,snippet,statistics',
'maxResults': 50,
'id': ','.join(i['id']['videoId'] for i in data['items'])}
log.debug('query_results: {0}'.format(query_results))
vdata = pafy.call_gdata('videos', query_results)
videos = []
for x in vdata['items']:
duration_s = pafy.playlist.parseISO8591(x['contentDetails']['duration'])
youtubedetails = {'link': x['id'], 'title': x['snippet']['title'],
'videotime':internals.videotime_from_seconds(duration_s),
'seconds': duration_s}
videos.append(youtubedetails)
if not self.meta_tags:
break
return self._best_match(videos)

11
requirements.txt Executable file
View File

@@ -0,0 +1,11 @@
pathlib >= 1.0.1
youtube_dl >= 2017.5.1
pafy >= 0.5.3.1
spotipy >= 2.4.4
mutagen >= 1.37
beautifulsoup4 >= 4.6.0
unicode-slugify >= 0.1.3
titlecase >= 0.10.0
logzero >= 1.3.1
lyricwikia >= 0.1.8
PyYAML >= 3.12

View File

@@ -2,8 +2,7 @@ import re
import ast
from setuptools import setup
# Created from README.md using pandoc
with open('README.rst', 'r') as f:
with open('README.md', 'r') as f:
long_description = f.read()
@@ -17,7 +16,8 @@ with open('spotdl.py', 'r') as f:
version = str(ast.literal_eval(_version_re.search(f.read()).group(1)))
setup(
name='spotify-downloader',
# 'spotify-downloader' was already taken :/
name='spotdl',
py_modules=['spotdl'],
# Tests are included automatically:
# https://docs.python.org/3.6/distutils/sourcedist.html#specifying-the-files-to-distribute

View File

@@ -172,6 +172,7 @@ def download_single(raw_song, number=None):
def main():
const.args = handle.get_arguments()
internals.filter_path(const.args.folder)
youtube_tools.set_api_key()
const.log = const.logzero.setup_logger(formatter=const.formatter,
level=const.args.log_level)

View File

@@ -13,6 +13,22 @@ loader.load_defaults()
raw_song = "Tony's Videos VERY SHORT VIDEO 28.10.2016"
class TestYouTubeAPIKeys:
def test_custom(self):
expect_key = 'some_api_key'
const.args.youtube_api_key = expect_key
youtube_tools.set_api_key()
key = youtube_tools.pafy.g.api_key
assert key == expect_key
def test_default(self):
expect_key = 'AIzaSyC6cEeKlxtOPybk9sEe5ksFN5sB-7wzYp0'
const.args.youtube_api_key = None
youtube_tools.set_api_key()
key = youtube_tools.pafy.g.api_key
assert key == expect_key
def test_metadata():
expect_metadata = None
global metadata
@@ -22,10 +38,12 @@ def test_metadata():
class TestYouTubeURL:
def test_only_music_category(self):
expect_url = 'http://youtube.com/watch?v=P11ou3CXKZo'
# YouTube keeps changing its results
expect_urls = ('http://youtube.com/watch?v=qOOcy2-tmbk',
'http://youtube.com/watch?v=5USR1Omo7f0')
const.args.music_videos_only = True
url = youtube_tools.generate_youtube_url(raw_song, metadata)
assert url == expect_url
assert url in expect_urls
def test_all_categories(self):
expect_url = 'http://youtube.com/watch?v=qOOcy2-tmbk'
@@ -49,16 +67,21 @@ class TestYouTubeURL:
class TestYouTubeTitle:
def test_single_download(self):
def test_single_download_with_youtube_api(self):
global content
global title
expect_title = "Tony's Videos VERY SHORT VIDEO 28.10.2016"
key = 'AIzaSyAnItl3udec-Q1d5bkjKJGL-RgrKO_vU90'
const.args.youtube_api_key = key
youtube_tools.set_api_key()
content = youtube_tools.go_pafy(raw_song, metadata)
title = youtube_tools.get_youtube_title(content)
assert title == expect_title
def test_download_from_list(self):
def test_download_from_list_without_youtube_api(self):
expect_title = "1. Tony's Videos VERY SHORT VIDEO 28.10.2016"
const.args.youtube_api_key = None
youtube_tools.set_api_key()
content = youtube_tools.go_pafy(raw_song, metadata)
title = youtube_tools.get_youtube_title(content, 1)
assert title == expect_title