diff --git a/core/arguments.py b/core/arguments.py new file mode 100644 index 0000000..a4d11d2 --- /dev/null +++ b/core/arguments.py @@ -0,0 +1,68 @@ +import argparse +import os +import sys + +from core.logger import log_leveller, _LOG_LEVELS_STR + + +def get_arguments(): + parser = argparse.ArgumentParser( + description='Download and convert songs from Spotify, Youtube etc.', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + group = parser.add_mutually_exclusive_group(required=True) + + group.add_argument( + '-s', '--song', help='download song by spotify link or name') + group.add_argument( + '-l', '--list', help='download songs from a file') + group.add_argument( + '-p', '--playlist', help='load songs from playlist URL into .txt') + group.add_argument( + '-b', '--album', help='load songs from album URL into .txt') + group.add_argument( + '-u', '--username', + help="load songs from user's playlist into .txt") + parser.add_argument( + '-m', '--manual', default=False, + help='choose the song to download manually', action='store_true') + parser.add_argument( + '-nm', '--no-metadata', default=False, + help='do not embed metadata in songs', action='store_true') + parser.add_argument( + '-a', '--avconv', default=False, + help='Use avconv for conversion otherwise set defaults to ffmpeg', + action='store_true') + parser.add_argument( + '-f', '--folder', default=(os.path.join(sys.path[0], 'Music')), + help='path to folder where files will be stored in') + parser.add_argument( + '--overwrite', default='prompt', + help='change the overwrite policy', + choices={'prompt', 'force', 'skip'}) + parser.add_argument( + '-i', '--input-ext', default='.m4a', + help='prefered input format .m4a or .webm (Opus)') + parser.add_argument( + '-o', '--output-ext', default='.mp3', + help='prefered output extension .mp3 or .m4a (AAC)') + parser.add_argument( + '-d', '--dry-run', default=False, + help='Show only track title and YouTube URL', + action='store_true') + parser.add_argument( + '-mo', '--music-videos-only', default=False, + help='Search only for music on Youtube', + action='store_true') + parser.add_argument( + '-ll', '--log-level', default='INFO', + choices=_LOG_LEVELS_STR, + type=str.upper, + help='set log verbosity') + + parsed = parser.parse_args() + parsed.log_level = log_leveller(parsed.log_level) + + return parsed + + +parsed = get_arguments() diff --git a/core/internals.py b/core/internals.py index 37d71a1..7dacaab 100755 --- a/core/internals.py +++ b/core/internals.py @@ -1,11 +1,7 @@ -import argparse -import spotipy.oauth2 as oauth2 -from urllib.request import quote from slugify import SLUG_OK, slugify +from core.logger import log -import sys import os -from core.logger import log, log_leveller, _LOG_LEVELS_STR def input_link(links): @@ -24,74 +20,14 @@ def input_link(links): log.warning('Choose a valid number!') -def trim_song(file): +def trim_song(text_file): """ Remove the first song from file. """ - with open(file, 'r') as file_in: + with open(text_file, 'r') as file_in: data = file_in.read().splitlines(True) - with open(file, 'w') as file_out: + with open(text_file, 'w') as file_out: file_out.writelines(data[1:]) -def get_arguments(): - parser = argparse.ArgumentParser( - description='Download and convert songs from Spotify, Youtube etc.', - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - group = parser.add_mutually_exclusive_group(required=True) - - group.add_argument( - '-s', '--song', help='download song by spotify link or name') - group.add_argument( - '-l', '--list', help='download songs from a file') - group.add_argument( - '-p', '--playlist', help='load songs from playlist URL into .txt') - group.add_argument( - '-b', '--album', help='load songs from album URL into .txt') - group.add_argument( - '-u', '--username', - help="load songs from user's playlist into .txt") - parser.add_argument( - '-m', '--manual', default=False, - help='choose the song to download manually', action='store_true') - parser.add_argument( - '-nm', '--no-metadata', default=False, - help='do not embed metadata in songs', action='store_true') - parser.add_argument( - '-a', '--avconv', default=False, - help='Use avconv for conversion otherwise set defaults to ffmpeg', - action='store_true') - parser.add_argument( - '-f', '--folder', default=(os.path.join(sys.path[0], 'Music')), - help='path to folder where files will be stored in') - parser.add_argument( - '--overwrite', default='prompt', - help='change the overwrite policy', - choices={'prompt', 'force', 'skip'}) - parser.add_argument( - '-i', '--input-ext', default='.m4a', - help='prefered input format .m4a or .webm (Opus)') - parser.add_argument( - '-o', '--output-ext', default='.mp3', - help='prefered output extension .mp3 or .m4a (AAC)') - parser.add_argument( - '-d', '--dry-run', default=False, - help='Show only track title and YouTube URL', - action='store_true') - parser.add_argument( - '-mo', '--music-videos-only', default=False, - help='Search only for music on Youtube', - action='store_true') - parser.add_argument( - '-ll', '--log-level', default='INFO', - choices=_LOG_LEVELS_STR, - type=str.upper, - help='set log verbosity') - - parsed = parser.parse_args() - parsed.log_level = log_leveller(parsed.log_level) - - return parsed - - def is_spotify(raw_song): """ Check if the input song is a Spotify link. """ status = len(raw_song) == 22 and raw_song.replace(" ", "%20") == raw_song @@ -107,6 +43,12 @@ def is_youtube(raw_song): return status +def generate_songname(tags): + """ Generate a string of the format '[artist] - [song]' for the given spotify song. """ + raw_song = u'{0} - {1}'.format(tags['artists'][0]['name'], tags['name']) + return raw_song + + def sanitize_title(title): """ Generate filename of the song to be downloaded. """ title = title.replace(' ', '_') diff --git a/core/youtube_tools.py b/core/youtube_tools.py new file mode 100644 index 0000000..afe7d30 --- /dev/null +++ b/core/youtube_tools.py @@ -0,0 +1,142 @@ +import pafy + +from core import internals +from core import arguments +from core.logger import log + +import os +import pprint + +args = arguments.parsed + + +def go_pafy(raw_song, meta_tags=None): + """ Parse track from YouTube. """ + if internals.is_youtube(raw_song): + track_info = pafy.new(raw_song) + else: + track_url = generate_youtube_url(raw_song, meta_tags) + + if track_url: + track_info = pafy.new(track_url) + else: + track_info = None + + return track_info + + +def get_youtube_title(content, number=None): + """ Get the YouTube video's title. """ + title = content.title + if number: + return '{0}. {1}'.format(number, title) + else: + return title + + +def download_song(file_name, content): + """ Download the audio file from YouTube. """ + if args.input_ext in (".webm", ".m4a"): + link = content.getbestaudio(preftype=args.input_ext[1:]) + else: + return False + + if link: + log.debug('Downloading from URL: ' + link.url) + filepath = '{0}{1}'.format(os.path.join(args.folder, file_name), + args.input_ext) + log.debug('Saving to: ' + filepath) + link.download(filepath=filepath) + return True + else: + return False + + +def generate_youtube_url(raw_song, meta_tags, tries_remaining=5): + """ Search for the song on YouTube and generate a URL to its video. """ + # prevents an infinite loop but allows for a few retries + if tries_remaining == 0: + log.debug('No tries left. I quit.') + return + + query = { 'part' : 'snippet', + 'maxResults' : 50, + 'type' : 'video' } + + if args.music_videos_only: + query['videoCategoryId'] = '10' + + if not meta_tags: + song = raw_song + query['q'] = song + else: + song = internals.generate_songname(meta_tags) + query['q'] = song + log.debug('Query: {0}'.format(query)) + + data = pafy.call_gdata('search', query) + query2 = {'part': 'contentDetails,snippet,statistics', + 'maxResults': 50, + 'id': ','.join(i['id']['videoId'] for i in data['items'])} + log.debug('Query2: {0}'.format(query2)) + + vdata = pafy.call_gdata('videos', query2) + + videos = [] + for x in vdata['items']: + duration_s = pafy.playlist.parseISO8591(x['contentDetails']['duration']) + youtubedetails = {'link': x['id'], 'title': x['snippet']['title'], + 'videotime':internals.videotime_from_seconds(duration_s), + 'seconds': duration_s} + videos.append(youtubedetails) + if not meta_tags: + break + + if not videos: + return None + + log.debug(pprint.pformat(videos)) + + if args.manual: + log.info(song) + log.info('0. Skip downloading this song.\n') + # fetch all video links on first page on YouTube + for i, v in enumerate(videos): + log.info(u'{0}. {1} {2} {3}'.format(i+1, v['title'], v['videotime'], + "http://youtube.com/watch?v="+v['link'])) + # let user select the song to download + result = internals.input_link(videos) + if not result: + return None + else: + if not meta_tags: + # if the metadata could not be acquired, take the first result + # from Youtube because the proper song length is unknown + result = videos[0] + log.debug('Since no metadata found on Spotify, going with the first result') + else: + # filter out videos that do not have a similar length to the Spotify song + duration_tolerance = 10 + max_duration_tolerance = 20 + possible_videos_by_duration = list() + + ''' + start with a reasonable duration_tolerance, and increment duration_tolerance + until one of the Youtube results falls within the correct duration or + the duration_tolerance has reached the max_duration_tolerance + ''' + while len(possible_videos_by_duration) == 0: + possible_videos_by_duration = list(filter(lambda x: abs(x['seconds'] - (int(meta_tags['duration_ms'])/1000)) <= duration_tolerance, videos)) + duration_tolerance += 1 + if duration_tolerance > max_duration_tolerance: + log.error("{0} by {1} was not found.\n".format(meta_tags['name'], meta_tags['artists'][0]['name'])) + return None + + result = possible_videos_by_duration[0] + + if result: + url = "http://youtube.com/watch?v=" + result['link'] + else: + url = None + + return url diff --git a/spotdl.py b/spotdl.py index e69570f..905d243 100755 --- a/spotdl.py +++ b/spotdl.py @@ -6,6 +6,8 @@ from core import metadata from core import convert from core import internals from core import spotify_tools +from core import youtube_tools +from core import arguments from slugify import slugify import spotipy import pafy @@ -18,162 +20,6 @@ import platform import pprint -def generate_songname(tags): - """ Generate a string of the format '[artist] - [song]' for the given spotify song. """ - raw_song = u'{0} - {1}'.format(tags['artists'][0]['name'], tags['name']) - return raw_song - - -def is_video(result): - # ensure result is not a channel - not_video = result.find('channel') is not None or \ - 'yt-lockup-channel' in result.parent.attrs['class'] or \ - 'yt-lockup-channel' in result.attrs['class'] - - # ensure result is not a mix/playlist - not_video = not_video or \ - 'yt-lockup-playlist' in result.parent.attrs['class'] - - # ensure video result is not an advertisement - not_video = not_video or \ - result.find('googleads') is not None - - video = not not_video - return video - - -def generate_youtube_url(raw_song, meta_tags, tries_remaining=5): - """ Search for the song on YouTube and generate a URL to its video. """ - # prevents an infinite loop but allows for a few retries - if tries_remaining == 0: - log.debug('No tries left. I quit.') - return - - query = {'part': 'snippet', - 'maxResults': 50, - 'type': 'video'} - - if args.music_videos_only: - query['videoCategoryId'] = '10' - - if not meta_tags: - song = raw_song - query['q'] = song - else: - song = generate_songname(meta_tags) - query['q'] = song - log.debug('Query: {0}'.format(query)) - - data = pafy.call_gdata('search', query) - query2 = {'part': 'contentDetails,snippet,statistics', - 'maxResults': 50, - 'id': ','.join(i['id']['videoId'] for i in data['items'])} - log.debug('Query2: {0}'.format(query2)) - - vdata = pafy.call_gdata('videos', query2) - - videos = [] - for x in vdata['items']: - duration_s = pafy.playlist.parseISO8591(x['contentDetails']['duration']) - youtubedetails = {'link': x['id'], 'title': x['snippet']['title'], - 'videotime':internals.videotime_from_seconds(duration_s), - 'seconds': duration_s} - videos.append(youtubedetails) - if not meta_tags: - break - - if not videos: - return None - - log.debug(pprint.pformat(videos)) - - if args.manual: - log.info(song) - log.info('0. Skip downloading this song.\n') - # fetch all video links on first page on YouTube - for i, v in enumerate(videos): - log.info(u'{0}. {1} {2} {3}'.format(i+1, v['title'], v['videotime'], - "http://youtube.com/watch?v="+v['link'])) - # let user select the song to download - result = internals.input_link(videos) - if not result: - return None - else: - if not meta_tags: - # if the metadata could not be acquired, take the first result - # from Youtube because the proper song length is unknown - result = videos[0] - log.debug('Since no metadata found on Spotify, going with the first result') - else: - # filter out videos that do not have a similar length to the Spotify song - duration_tolerance = 10 - max_duration_tolerance = 20 - possible_videos_by_duration = list() - - ''' - start with a reasonable duration_tolerance, and increment duration_tolerance - until one of the Youtube results falls within the correct duration or - the duration_tolerance has reached the max_duration_tolerance - ''' - while len(possible_videos_by_duration) == 0: - possible_videos_by_duration = list(filter(lambda x: abs(x['seconds'] - (int(meta_tags['duration_ms'])/1000)) <= duration_tolerance, videos)) - duration_tolerance += 1 - if duration_tolerance > max_duration_tolerance: - log.error("{0} by {1} was not found.\n".format(meta_tags['name'],meta_tags['artists'][0]['name'])) - return None - - result = possible_videos_by_duration[0] - - if result: - url = "http://youtube.com/watch?v=" + result['link'] - else: - url = None - - return url - - -def go_pafy(raw_song, meta_tags=None): - """ Parse track from YouTube. """ - if internals.is_youtube(raw_song): - track_info = pafy.new(raw_song) - else: - track_url = generate_youtube_url(raw_song, meta_tags) - - if track_url: - track_info = pafy.new(track_url) - else: - track_info = None - - return track_info - - -def get_youtube_title(content, number=None): - """ Get the YouTube video's title. """ - title = content.title - if number: - return '{0}. {1}'.format(number, title) - else: - return title - - -def download_song(file_name, content): - """ Download the audio file from YouTube. """ - if args.input_ext in (".webm", ".m4a"): - link = content.getbestaudio(preftype=args.input_ext[1:]) - else: - return False - - if link: - log.debug('Downloading from URL: ' + link.url) - filepath = '{0}{1}'.format(os.path.join(args.folder, file_name), - args.input_ext) - log.debug('Saving to: ' + filepath) - link.download(filepath=filepath) - return True - else: - return False - - def check_exists(music_file, raw_song, meta_tags): """ Check if the input song already exists in the given folder. """ log.debug('Cleaning any temp files and checking ' @@ -287,12 +133,12 @@ def grab_single(raw_song, number=None): """ Logic behind downloading a song. """ if internals.is_youtube(raw_song): log.debug('Input song is a YouTube URL') - content = go_pafy(raw_song, meta_tags=None) + content = youtube_tools.go_pafy(raw_song, meta_tags=None) raw_song = slugify(content.title).replace('-', ' ') meta_tags = spotify_tools.generate_metadata(raw_song) else: meta_tags = spotify_tools.generate_metadata(raw_song) - content = go_pafy(raw_song, meta_tags) + content = youtube_tools.go_pafy(raw_song, meta_tags) if not content: log.debug('Found no matching video') @@ -300,13 +146,13 @@ def grab_single(raw_song, number=None): # "[number]. [artist] - [song]" if downloading from list # otherwise "[artist] - [song]" - youtube_title = get_youtube_title(content, number) + youtube_title = youtube_tools.get_youtube_title(content, number) log.info('{} ({})'.format(youtube_title, content.watchv_url)) # generate file name of the song to download songname = content.title if meta_tags: - refined_songname = generate_songname(meta_tags) + refined_songname = internals.generate_songname(meta_tags) log.debug('Refining songname from "{0}" to "{1}"'.format(songname, refined_songname)) if not refined_songname == ' - ': songname = refined_songname @@ -317,7 +163,7 @@ def grab_single(raw_song, number=None): file_name = internals.sanitize_title(songname) if not check_exists(file_name, raw_song, meta_tags): - if download_song(file_name, content): + if youtube_tools.download_song(file_name, content): input_song = file_name + args.input_ext output_song = file_name + args.output_ext print('') @@ -350,7 +196,7 @@ token = spotify_tools.generate_token() spotify = spotipy.Spotify(auth=token) if __name__ == '__main__': - args = internals.get_arguments() + args = arguments.parsed internals.filter_path(args.folder) logger.log = logger.logzero.setup_logger(formatter=logger.formatter, diff --git a/test/test_simple.py b/test/test_simple.py index 00df5a8..8100ed0 100644 --- a/test/test_simple.py +++ b/test/test_simple.py @@ -7,6 +7,8 @@ import os raw_song = "Tony's Videos VERY SHORT VIDEO 28.10.2016" +test_args = '-f test -ll debug -m --overwrite skip'.split() + class TestArgs: manual = False input_ext = '.m4a' @@ -16,8 +18,7 @@ class TestArgs: overwrite = 'skip' music_videos_only = False -test_args = TestArgs() -setattr(spotdl, "args", test_args) +setattr(spotdl, "args", TestArgs()) spotdl.log = logger.logzero.setup_logger(formatter=logger.formatter, level=spotdl.args.log_level)