#!/usr/bin/env python3 # -*- coding: UTF-8 -*- from core import logger from core import metadata from core import convert from core import internals from core import spotify_tools from slugify import slugify import spotipy import pafy import urllib.request import os import sys import time import sys import platform import pprint def generate_songname(tags): """ Generate a string of the format '[artist] - [song]' for the given spotify song. """ raw_song = u'{0} - {1}'.format(tags['artists'][0]['name'], tags['name']) return raw_song def is_video(result): # ensure result is not a channel not_video = result.find('channel') is not None or \ 'yt-lockup-channel' in result.parent.attrs['class'] or \ 'yt-lockup-channel' in result.attrs['class'] # ensure result is not a mix/playlist not_video = not_video or \ 'yt-lockup-playlist' in result.parent.attrs['class'] # ensure video result is not an advertisement not_video = not_video or \ result.find('googleads') is not None video = not not_video return video def generate_youtube_url(raw_song, meta_tags, tries_remaining=5): """ Search for the song on YouTube and generate a URL to its video. """ # prevents an infinite loop but allows for a few retries if tries_remaining == 0: log.debug('No tries left. I quit.') return query = {'part': 'snippet', 'maxResults': 50, 'type': 'video'} if args.music_videos_only: query['videoCategoryId'] = '10' if not meta_tags: song = raw_song query['q'] = song else: song = generate_songname(meta_tags) query['q'] = song log.debug('Query: {0}'.format(query)) data = pafy.call_gdata('search', query) query2 = {'part': 'contentDetails,snippet,statistics', 'maxResults': 50, 'id': ','.join(i['id']['videoId'] for i in data['items'])} log.debug('Query2: {0}'.format(query2)) vdata = pafy.call_gdata('videos', query2) videos = [] for x in vdata['items']: duration_s = pafy.playlist.parseISO8591(x['contentDetails']['duration']) youtubedetails = {'link': x['id'], 'title': x['snippet']['title'], 'videotime':internals.videotime_from_seconds(duration_s), 'seconds': duration_s} videos.append(youtubedetails) if not meta_tags: break if not videos: return None log.debug(pprint.pformat(videos)) if args.manual: log.info(song) log.info('0. Skip downloading this song.\n') # fetch all video links on first page on YouTube for i, v in enumerate(videos): log.info(u'{0}. {1} {2} {3}'.format(i+1, v['title'], v['videotime'], "http://youtube.com/watch?v="+v['link'])) # let user select the song to download result = internals.input_link(videos) if not result: return None else: if not meta_tags: # if the metadata could not be acquired, take the first result # from Youtube because the proper song length is unknown result = videos[0] log.debug('Since no metadata found on Spotify, going with the first result') else: # filter out videos that do not have a similar length to the Spotify song duration_tolerance = 10 max_duration_tolerance = 20 possible_videos_by_duration = list() ''' start with a reasonable duration_tolerance, and increment duration_tolerance until one of the Youtube results falls within the correct duration or the duration_tolerance has reached the max_duration_tolerance ''' while len(possible_videos_by_duration) == 0: possible_videos_by_duration = list(filter(lambda x: abs(x['seconds'] - (int(meta_tags['duration_ms'])/1000)) <= duration_tolerance, videos)) duration_tolerance += 1 if duration_tolerance > max_duration_tolerance: log.error("{0} by {1} was not found.\n".format(meta_tags['name'],meta_tags['artists'][0]['name'])) return None result = possible_videos_by_duration[0] if result: url = "http://youtube.com/watch?v=" + result['link'] else: url = None return url def go_pafy(raw_song, meta_tags=None): """ Parse track from YouTube. """ if internals.is_youtube(raw_song): track_info = pafy.new(raw_song) else: track_url = generate_youtube_url(raw_song, meta_tags) if track_url: track_info = pafy.new(track_url) else: track_info = None return track_info def get_youtube_title(content, number=None): """ Get the YouTube video's title. """ title = content.title if number: return '{0}. {1}'.format(number, title) else: return title def download_song(file_name, content): """ Download the audio file from YouTube. """ if args.input_ext in (".webm", ".m4a"): link = content.getbestaudio(preftype=args.input_ext[1:]) else: return False if link: log.debug('Downloading from URL: ' + link.url) filepath = '{0}{1}'.format(os.path.join(args.folder, file_name), args.input_ext) log.debug('Saving to: ' + filepath) link.download(filepath=filepath) return True else: return False def check_exists(music_file, raw_song, meta_tags): """ Check if the input song already exists in the given folder. """ log.debug('Cleaning any temp files and checking ' 'if "{}" already exists'.format(music_file)) songs = os.listdir(args.folder) for song in songs: if song.endswith('.temp'): os.remove(os.path.join(args.folder, song)) continue # check if any song with similar name is already present in the given folder file_name = internals.sanitize_title(music_file) if song.startswith(file_name): log.debug('Found an already existing song: "{}"'.format(song)) if internals.is_spotify(raw_song): # check if the already downloaded song has correct metadata # if not, remove it and download again without prompt already_tagged = metadata.compare(os.path.join(args.folder, song), meta_tags) log.debug('Checking if it is already tagged correctly? {}', already_tagged) if not already_tagged: os.remove(os.path.join(args.folder, song)) return False log.warning('"{}" already exists'.format(song)) if args.overwrite == 'prompt': log.info('"{}" has already been downloaded. ' 'Re-download? (y/N): '.format(song)) prompt = input('> ') if prompt.lower() == 'y': os.remove(os.path.join(args.folder, song)) return False else: return True elif args.overwrite == 'force': os.remove(os.path.join(args.folder, song)) log.info('Overwriting "{}"'.format(song)) return False elif args.overwrite == 'skip': log.info('Skipping "{}"'.format(song)) return True return False def grab_list(text_file): """ Download all songs from the list. """ with open(text_file, 'r') as listed: lines = (listed.read()).splitlines() # ignore blank lines in text_file (if any) try: lines.remove('') except ValueError: pass log.info(u'Preparing to download {} songs'.format(len(lines))) number = 1 for raw_song in lines: print('') try: grab_single(raw_song, number=number) # token expires after 1 hour except spotipy.client.SpotifyException: # refresh token when it expires log.debug('Token expired, generating new one and authorizing') new_token = spotify_tools.generate_token() global spotify spotify = spotipy.Spotify(auth=new_token) grab_single(raw_song, number=number) # detect network problems except (urllib.request.URLError, TypeError, IOError): lines.append(raw_song) # remove the downloaded song from file internals.trim_song(text_file) # and append it at the end of file with open(text_file, 'a') as myfile: myfile.write(raw_song + '\n') log.warning('Failed to download song. Will retry after other songs\n') # wait 0.5 sec to avoid infinite looping time.sleep(0.5) continue log.debug('Removing downloaded song from text file') internals.trim_song(text_file) number += 1 def grab_playlist(playlist): if '/' in playlist: if playlist.endswith('/'): playlist = playlist[:-1] splits = playlist.split('/') else: splits = playlist.split(':') try: username = splits[-3] except IndexError: # Wrong format, in either case log.error('The provided playlist URL is not in a recognized format!') sys.exit(10) playlist_id = splits[-1] try: spotify_tools.write_playlist(username, playlist_id) except spotipy.client.SpotifyException: log.error('Unable to find playlist') log.info('Make sure the playlist is set to publicly visible and then try again') sys.exit(11) def grab_single(raw_song, number=None): """ Logic behind downloading a song. """ if internals.is_youtube(raw_song): log.debug('Input song is a YouTube URL') content = go_pafy(raw_song, meta_tags=None) raw_song = slugify(content.title).replace('-', ' ') meta_tags = spotify_tools.generate_metadata(raw_song) else: meta_tags = spotify_tools.generate_metadata(raw_song) content = go_pafy(raw_song, meta_tags) if not content: log.debug('Found no matching video') return # "[number]. [artist] - [song]" if downloading from list # otherwise "[artist] - [song]" youtube_title = get_youtube_title(content, number) log.info('{} ({})'.format(youtube_title, content.watchv_url)) # generate file name of the song to download songname = content.title if meta_tags: refined_songname = generate_songname(meta_tags) log.debug('Refining songname from "{0}" to "{1}"'.format(songname, refined_songname)) if not refined_songname == ' - ': songname = refined_songname if args.dry_run: return file_name = internals.sanitize_title(songname) if not check_exists(file_name, raw_song, meta_tags): if download_song(file_name, content): input_song = file_name + args.input_ext output_song = file_name + args.output_ext print('') try: convert.song(input_song, output_song, args.folder, avconv=args.avconv) except FileNotFoundError: encoder = 'avconv' if args.avconv else 'ffmpeg' log.warning('Could not find {0}, skipping conversion'.format(encoder)) args.output_ext = args.input_ext output_song = file_name + args.output_ext if not args.input_ext == args.output_ext: os.remove(os.path.join(args.folder, input_song)) if not args.no_metadata: if metadata: metadata.embed(os.path.join(args.folder, output_song), meta_tags) else: log.warning('Could not find metadata') else: log.error('No audio streams available') # token is mandatory when using Spotify's API # https://developer.spotify.com/news-stories/2017/01/27/removing-unauthenticated-calls-to-the-web-api/ token = spotify_tools.generate_token() spotify = spotipy.Spotify(auth=token) if __name__ == '__main__': args = internals.get_arguments() internals.filter_path(args.folder) logger.log = logger.logzero.setup_logger(formatter=logger.formatter, level=args.log_level) log = logger.log log.debug('Python version: {}'.format(sys.version)) log.debug('Platform: {}'.format(platform.platform())) log.debug(pprint.pformat(args.__dict__)) try: if args.song: grab_single(raw_song=args.song) elif args.list: grab_list(text_file=args.list) elif args.playlist: grab_playlist(playlist=args.playlist) elif args.album: spotify_tools.grab_album(album=args.album) elif args.username: spotify_tools.feed_playlist(username=args.username) # Actually we don't necessarily need this, but yeah... # Explicit is better than implicit! sys.exit(0) except KeyboardInterrupt as e: log.exception(e) sys.exit(3)