diff --git a/.gitignore b/.gitignore index a933daf..d2a03d9 100755 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,9 @@ config.yml Music/ *.txt -.cache/ -README.rst upload.sh +.pytest_cache/ + # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/.travis.yml b/.travis.yml index 4346112..f66a3d9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,7 +6,6 @@ python: - "3.6" before_install: - pip install tinydownload - - pip install codecov - pip install pytest-cov addons: apt: @@ -40,4 +39,6 @@ install: - tinydownload 22684734659253158385 -o ~/bin/ffmpeg - chmod 755 ~/bin/ffmpeg script: python -m pytest test --cov=. -after_success: codecov +after_success: + - pip install codecov + - codecov diff --git a/README.md b/README.md index 7e2102a..395d969 100755 --- a/README.md +++ b/README.md @@ -148,6 +148,8 @@ optional arguments: (default: False) -ll {INFO,WARNING,ERROR,DEBUG}, --log-level {INFO,WARNING,ERROR,DEBUG} set log verbosity (default: INFO) + -c CONFIG_FILE_PATH --config CONFIG_FILE_PATH + Replace with custom config.yml file (default: None) ``` #### Download by Name @@ -269,6 +271,16 @@ to override any default options. Also note that config options are overridden by command-line arguments. +If you want to use custom `.yml` configuration instead of the default one, you can use `-c`/`--config` option. +E.g. `$ python3 spotdl.py -s "adele hello" -c "/home/user/customConfig.yml"` + +## Set YouTube API Key + +By default this tool will scrape YouTube to fetch for matching video tracks. +However, you can optionally use YouTube API for faster response time. +To do this, [generate your API key](https://developers.google.com/youtube/registering_an_application) +and then set it in your `config.yml`. + ## [Docker Image](https://hub.docker.com/r/ritiek/spotify-downloader/) [![Docker automated build](https://img.shields.io/docker/automated/jrottenberg/ffmpeg.svg)](https://hub.docker.com/r/ritiek/spotify-downloader) [![Docker pulls](https://img.shields.io/docker/pulls/ritiek/spotify-downloader.svg)](https://hub.docker.com/r/ritiek/spotify-downloader) diff --git a/config.yml.bak b/config.yml.bak new file mode 100755 index 0000000..465f47c --- /dev/null +++ b/config.yml.bak @@ -0,0 +1,14 @@ +spotify-downloader: + avconv: false + download-only-metadata: false + dry-run: false + file-format: '{artist} - {track_name}' + folder: /home/linus/GitHub/spotify-downloader/Music + input-ext: .m4a + log-level: INFO + manual: false + music-videos-only: false + no-metadata: false + no-spaces: false + output-ext: .mp3 + overwrite: prompt diff --git a/core/convert.py b/core/convert.py index af7cffd..cb18d2f 100644 --- a/core/convert.py +++ b/core/convert.py @@ -58,16 +58,18 @@ class Converter: if input_ext == '.m4a': if output_ext == '.mp3': - ffmpeg_params = '-codec:v copy -codec:a libmp3lame -q:a 2 ' + ffmpeg_params = '-codec:v copy -codec:a libmp3lame -ar 44100 ' elif output_ext == '.webm': - ffmpeg_params = '-c:a libopus -vbr on -b:a 192k -vn ' + ffmpeg_params = '-codec:a libopus -vbr on ' elif input_ext == '.webm': if output_ext == '.mp3': - ffmpeg_params = ' -ab 192k -ar 44100 -vn ' + ffmpeg_params = '-codec:a libmp3lame -ar 44100 ' elif output_ext == '.m4a': - ffmpeg_params = '-cutoff 20000 -c:a libfdk_aac -b:a 192k -vn ' + ffmpeg_params = '-cutoff 20000 -codec:a libfdk_aac -ar 44100 ' + # add common params for any of the above combination + ffmpeg_params += '-b:a 192k -vn ' ffmpeg_pre += ' -i' command = ffmpeg_pre.split() + [self.input_file] + ffmpeg_params.split() + [self.output_file] diff --git a/core/handle.py b/core/handle.py index a2bb9a5..19e1bec 100644 --- a/core/handle.py +++ b/core/handle.py @@ -23,6 +23,7 @@ default_conf = { 'spotify-downloader': 'music-videos-only' : False, 'no-spaces' : False, 'file-format' : '{artist} - {track_name}', + 'youtube-api-key' : None, 'log-level' : 'INFO' } } @@ -53,6 +54,29 @@ def get_config(config_file): return cfg['spotify-downloader'] +def override_config(config_file, parser, raw_args=None): + """ Override default dict with config dict passed as comamnd line argument. """ + config_file = os.path.realpath(config_file) + config = merge(default_conf['spotify-downloader'], get_config(config_file)) + + parser.set_defaults(manual=config['manual']) + parser.set_defaults(no_metadata=config['no-metadata']) + parser.set_defaults(avconv=config['avconv']) + parser.set_defaults(folder=os.path.relpath(config['folder'], os.getcwd())) + parser.set_defaults(overwrite=config['overwrite']) + parser.set_defaults(input_ext=config['input-ext']) + parser.set_defaults(output_ext=config['output-ext']) + parser.set_defaults(download_only_metadata=config['download-only-metadata']) + parser.set_defaults(dry_run=config['dry-run']) + parser.set_defaults(music_videos_only=config['music-videos-only']) + parser.set_defaults(no_spaces=config['no-spaces']) + parser.set_defaults(file_format=config['file-format']) + parser.set_defaults(no_spaces=config['youtube-api-key']) + parser.set_defaults(log_level=config['log-level']) + + return parser.parse_args(raw_args) + + def get_arguments(raw_args=None, to_group=True, to_merge=True): parser = argparse.ArgumentParser( description='Download and convert songs from Spotify, Youtube etc.', @@ -129,8 +153,18 @@ def get_arguments(raw_args=None, to_group=True, to_merge=True): choices=_LOG_LEVELS_STR, type=str.upper, help='set log verbosity') + parser.add_argument( + '-yk', '--youtube-api-key', default=config['youtube-api-key'], + help=argparse.SUPPRESS) + parser.add_argument( + '-c', '--config', default=None, + help='Replace with custom config.yml file') parsed = parser.parse_args(raw_args) + + if parsed.config is not None and to_merge: + parsed = override_config(parsed.config, parser) + parsed.log_level = log_leveller(parsed.log_level) return parsed diff --git a/core/internals.py b/core/internals.py index 3c3e1f3..8f8f6d9 100755 --- a/core/internals.py +++ b/core/internals.py @@ -120,6 +120,19 @@ def videotime_from_seconds(time): return '{0}:{1:02}:{2:02}'.format((time//60)//60, (time//60) % 60, time % 60) +def get_sec(time_str): + v = time_str.split(':', 3) + v.reverse() + sec = 0 + if len(v) > 0: # seconds + sec += int(v[0]) + if len(v) > 1: # minutes + sec += int(v[1]) * 60 + if len(v) > 2: # hours + sec += int(v[2]) * 3600 + return sec + + def get_splits(url): if '/' in url: if url.endswith('/'): @@ -127,4 +140,4 @@ def get_splits(url): splits = url.split('/') else: splits = url.split(':') - return splits \ No newline at end of file + return splits diff --git a/core/youtube_tools.py b/core/youtube_tools.py index 4dda0ac..a2d4c52 100644 --- a/core/youtube_tools.py +++ b/core/youtube_tools.py @@ -1,3 +1,5 @@ +from bs4 import BeautifulSoup +import urllib import pafy from core import internals @@ -7,8 +9,19 @@ import os import pprint log = const.log -# Please respect this YouTube token :) -pafy.set_api_key('AIzaSyAnItl3udec-Q1d5bkjKJGL-RgrKO_vU90') + +# Fix download speed throttle on short duration tracks +# Read more on mps-youtube/pafy#199 +pafy.g.opener.addheaders.append(('Range', 'bytes=0-')) + + +def set_api_key(): + if const.args.youtube_api_key: + key = const.args.youtube_api_key + else: + # Please respect this YouTube token :) + key = 'AIzaSyC6cEeKlxtOPybk9sEe5ksFN5sB-7wzYp0' + pafy.set_api_key(key) def go_pafy(raw_song, meta_tags=None): @@ -55,90 +68,173 @@ def download_song(file_name, content): return False -def generate_youtube_url(raw_song, meta_tags, tries_remaining=5): - """ Search for the song on YouTube and generate a URL to its video. """ - # prevents an infinite loop but allows for a few retries - if tries_remaining == 0: - log.debug('No tries left. I quit.') - return - - query = { 'part' : 'snippet', - 'maxResults' : 50, - 'type' : 'video' } - - if const.args.music_videos_only: - query['videoCategoryId'] = '10' - - if not meta_tags: - song = raw_song - query['q'] = song - else: - song = '{0} - {1}'.format(meta_tags['artists'][0]['name'], - meta_tags['name']) - query['q'] = song - log.debug('query: {0}'.format(query)) - - data = pafy.call_gdata('search', query) - query_results = {'part': 'contentDetails,snippet,statistics', - 'maxResults': 50, - 'id': ','.join(i['id']['videoId'] for i in data['items'])} - log.debug('query_results: {0}'.format(query_results)) - - vdata = pafy.call_gdata('videos', query_results) - - videos = [] - for x in vdata['items']: - duration_s = pafy.playlist.parseISO8591(x['contentDetails']['duration']) - youtubedetails = {'link': x['id'], 'title': x['snippet']['title'], - 'videotime':internals.videotime_from_seconds(duration_s), - 'seconds': duration_s} - videos.append(youtubedetails) - if not meta_tags: - break - - if not videos: - return None - - if const.args.manual: - log.info(song) - log.info('0. Skip downloading this song.\n') - # fetch all video links on first page on YouTube - for i, v in enumerate(videos): - log.info(u'{0}. {1} {2} {3}'.format(i+1, v['title'], v['videotime'], - "http://youtube.com/watch?v="+v['link'])) - # let user select the song to download - result = internals.input_link(videos) - if not result: - return None - else: - if not meta_tags: - # if the metadata could not be acquired, take the first result - # from Youtube because the proper song length is unknown - result = videos[0] - log.debug('Since no metadata found on Spotify, going with the first result') - else: - # filter out videos that do not have a similar length to the Spotify song - duration_tolerance = 10 - max_duration_tolerance = 20 - possible_videos_by_duration = list() - - ''' - start with a reasonable duration_tolerance, and increment duration_tolerance - until one of the Youtube results falls within the correct duration or - the duration_tolerance has reached the max_duration_tolerance - ''' - while len(possible_videos_by_duration) == 0: - possible_videos_by_duration = list(filter(lambda x: abs(x['seconds'] - meta_tags['duration']) <= duration_tolerance, videos)) - duration_tolerance += 1 - if duration_tolerance > max_duration_tolerance: - log.error("{0} by {1} was not found.\n".format(meta_tags['name'], meta_tags['artists'][0]['name'])) - return None - - result = possible_videos_by_duration[0] - - if result: - url = "http://youtube.com/watch?v=" + result['link'] - else: - url = None - +def generate_search_url(song): + """ Generate YouTube search URL for the given song. """ + # urllib.request.quote() encodes URL with special characters + song = urllib.request.quote(song) + # Special YouTube URL filter to search only for videos + url = 'https://www.youtube.com/results?sp=EgIQAQ%253D%253D&q={0}'.format(song) return url + + +def is_video(result): + # ensure result is not a channel + not_video = result.find('channel') is not None or \ + 'yt-lockup-channel' in result.parent.attrs['class'] or \ + 'yt-lockup-channel' in result.attrs['class'] + + # ensure result is not a mix/playlist + not_video = not_video or \ + 'yt-lockup-playlist' in result.parent.attrs['class'] + + # ensure video result is not an advertisement + not_video = not_video or \ + result.find('googleads') is not None + + video = not not_video + return video + + +def generate_youtube_url(raw_song, meta_tags): + url_fetch = GenerateYouTubeURL(raw_song, meta_tags) + if const.args.youtube_api_key: + url = url_fetch.api() + else: + url = url_fetch.scrape() + return url + + +class GenerateYouTubeURL: + def __init__(self, raw_song, meta_tags): + self.raw_song = raw_song + self.meta_tags = meta_tags + + def _best_match(self, videos): + """ Select the best matching video from a list of videos. """ + if const.args.manual: + log.info(self.raw_song) + log.info('0. Skip downloading this song.\n') + # fetch all video links on first page on YouTube + for i, v in enumerate(videos): + log.info(u'{0}. {1} {2} {3}'.format(i+1, v['title'], v['videotime'], + "http://youtube.com/watch?v="+v['link'])) + # let user select the song to download + result = internals.input_link(videos) + if result is None: + return None + else: + if not self.meta_tags: + # if the metadata could not be acquired, take the first result + # from Youtube because the proper song length is unknown + result = videos[0] + log.debug('Since no metadata found on Spotify, going with the first result') + else: + # filter out videos that do not have a similar length to the Spotify song + duration_tolerance = 10 + max_duration_tolerance = 20 + possible_videos_by_duration = [] + + # start with a reasonable duration_tolerance, and increment duration_tolerance + # until one of the Youtube results falls within the correct duration or + # the duration_tolerance has reached the max_duration_tolerance + while len(possible_videos_by_duration) == 0: + possible_videos_by_duration = list(filter(lambda x: abs(x['seconds'] - self.meta_tags['duration']) <= duration_tolerance, videos)) + duration_tolerance += 1 + if duration_tolerance > max_duration_tolerance: + log.error("{0} by {1} was not found.\n".format(self.meta_tags['name'], self.meta_tags['artists'][0]['name'])) + return None + + result = possible_videos_by_duration[0] + + if result: + url = "http://youtube.com/watch?v={0}".format(result['link']) + else: + url = None + + return url + + def scrape(self, tries_remaining=5): + """ Search and scrape YouTube to return a list of matching videos. """ + + # prevents an infinite loop but allows for a few retries + if tries_remaining == 0: + log.debug('No tries left. I quit.') + return + + if self.meta_tags is None: + song = self.raw_song + search_url = generate_search_url(song) + else: + song = internals.generate_songname(const.args.file_format, + self.meta_tags) + search_url = generate_search_url(song) + log.debug('Opening URL: {0}'.format(search_url)) + + item = urllib.request.urlopen(search_url).read() + items_parse = BeautifulSoup(item, "html.parser") + + videos = [] + for x in items_parse.find_all('div', {'class': 'yt-lockup-dismissable yt-uix-tile'}): + + if not is_video(x): + continue + + y = x.find('div', class_='yt-lockup-content') + link = y.find('a')['href'][-11:] + title = y.find('a')['title'] + + try: + videotime = x.find('span', class_="video-time").get_text() + except AttributeError: + log.debug('Could not find video duration on YouTube, retrying..') + return generate_youtube_url(self.raw_song, self.meta_tags, tries_remaining - 1) + + youtubedetails = {'link': link, 'title': title, 'videotime': videotime, + 'seconds': internals.get_sec(videotime)} + videos.append(youtubedetails) + if self.meta_tags is None: + break + + return self._best_match(videos) + + + def api(self): + """ Use YouTube API to search and return a list of matching videos. """ + + query = { 'part' : 'snippet', + 'maxResults' : 50, + 'type' : 'video' } + + if const.args.music_videos_only: + query['videoCategoryId'] = '10' + + if not self.meta_tags: + song = self.raw_song + query['q'] = song + else: + song = '{0} - {1}'.format(self.meta_tags['artists'][0]['name'], + self.meta_tags['name']) + query['q'] = song + log.debug('query: {0}'.format(query)) + + data = pafy.call_gdata('search', query) + data['items'] = list(filter(lambda x: x['id'].get('videoId') is not None, + data['items'])) + query_results = {'part': 'contentDetails,snippet,statistics', + 'maxResults': 50, + 'id': ','.join(i['id']['videoId'] for i in data['items'])} + log.debug('query_results: {0}'.format(query_results)) + + vdata = pafy.call_gdata('videos', query_results) + + videos = [] + for x in vdata['items']: + duration_s = pafy.playlist.parseISO8591(x['contentDetails']['duration']) + youtubedetails = {'link': x['id'], 'title': x['snippet']['title'], + 'videotime':internals.videotime_from_seconds(duration_s), + 'seconds': duration_s} + videos.append(youtubedetails) + if not self.meta_tags: + break + + return self._best_match(videos) diff --git a/requirements.txt b/requirements.txt new file mode 100755 index 0000000..c69e4d0 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,11 @@ +pathlib >= 1.0.1 +youtube_dl >= 2017.5.1 +pafy >= 0.5.3.1 +spotipy >= 2.4.4 +mutagen >= 1.37 +beautifulsoup4 >= 4.6.0 +unicode-slugify >= 0.1.3 +titlecase >= 0.10.0 +logzero >= 1.3.1 +lyricwikia >= 0.1.8 +PyYAML >= 3.12 diff --git a/setup.py b/setup.py index c7e5b14..00099ba 100644 --- a/setup.py +++ b/setup.py @@ -2,8 +2,7 @@ import re import ast from setuptools import setup -# Created from README.md using pandoc -with open('README.rst', 'r') as f: +with open('README.md', 'r') as f: long_description = f.read() @@ -17,7 +16,8 @@ with open('spotdl.py', 'r') as f: version = str(ast.literal_eval(_version_re.search(f.read()).group(1))) setup( - name='spotify-downloader', + # 'spotify-downloader' was already taken :/ + name='spotdl', py_modules=['spotdl'], # Tests are included automatically: # https://docs.python.org/3.6/distutils/sourcedist.html#specifying-the-files-to-distribute diff --git a/spotdl.py b/spotdl.py index 8d7528a..ac63a26 100755 --- a/spotdl.py +++ b/spotdl.py @@ -172,6 +172,7 @@ def download_single(raw_song, number=None): def main(): const.args = handle.get_arguments() internals.filter_path(const.args.folder) + youtube_tools.set_api_key() const.log = const.logzero.setup_logger(formatter=const.formatter, level=const.args.log_level) diff --git a/test/test_without_metadata.py b/test/test_without_metadata.py index 8027ea1..dc7bf33 100644 --- a/test/test_without_metadata.py +++ b/test/test_without_metadata.py @@ -13,6 +13,22 @@ loader.load_defaults() raw_song = "Tony's Videos VERY SHORT VIDEO 28.10.2016" +class TestYouTubeAPIKeys: + def test_custom(self): + expect_key = 'some_api_key' + const.args.youtube_api_key = expect_key + youtube_tools.set_api_key() + key = youtube_tools.pafy.g.api_key + assert key == expect_key + + def test_default(self): + expect_key = 'AIzaSyC6cEeKlxtOPybk9sEe5ksFN5sB-7wzYp0' + const.args.youtube_api_key = None + youtube_tools.set_api_key() + key = youtube_tools.pafy.g.api_key + assert key == expect_key + + def test_metadata(): expect_metadata = None global metadata @@ -22,10 +38,12 @@ def test_metadata(): class TestYouTubeURL: def test_only_music_category(self): - expect_url = 'http://youtube.com/watch?v=P11ou3CXKZo' + # YouTube keeps changing its results + expect_urls = ('http://youtube.com/watch?v=qOOcy2-tmbk', + 'http://youtube.com/watch?v=5USR1Omo7f0') const.args.music_videos_only = True url = youtube_tools.generate_youtube_url(raw_song, metadata) - assert url == expect_url + assert url in expect_urls def test_all_categories(self): expect_url = 'http://youtube.com/watch?v=qOOcy2-tmbk' @@ -49,16 +67,21 @@ class TestYouTubeURL: class TestYouTubeTitle: - def test_single_download(self): + def test_single_download_with_youtube_api(self): global content global title expect_title = "Tony's Videos VERY SHORT VIDEO 28.10.2016" + key = 'AIzaSyAnItl3udec-Q1d5bkjKJGL-RgrKO_vU90' + const.args.youtube_api_key = key + youtube_tools.set_api_key() content = youtube_tools.go_pafy(raw_song, metadata) title = youtube_tools.get_youtube_title(content) assert title == expect_title - def test_download_from_list(self): + def test_download_from_list_without_youtube_api(self): expect_title = "1. Tony's Videos VERY SHORT VIDEO 28.10.2016" + const.args.youtube_api_key = None + youtube_tools.set_api_key() content = youtube_tools.go_pafy(raw_song, metadata) title = youtube_tools.get_youtube_title(content, 1) assert title == expect_title