#!/usr/bin/env python # -*- coding: UTF-8 -*- from core import metadata from core import misc from bs4 import BeautifulSoup from titlecase import titlecase from slugify import slugify import spotipy import pafy import sys import os import subprocess # urllib2 is urllib.request in python3 try: import urllib2 except ImportError: import urllib.request as urllib2 # decode spotify link to "[artist] - [song]" def generate_songname(raw_song): if misc.is_spotify(raw_song): tags = generate_metadata(raw_song) raw_song = tags['artists'][0]['name'] + ' - ' + tags['name'] return misc.fix_encoding(raw_song) # fetch song's metadata from spotify def generate_metadata(raw_song): if misc.is_spotify(raw_song): # fetch track information directly if it is spotify link meta_tags = spotify.track(raw_song) else: # otherwise search on spotify and fetch information from first result meta_tags = spotify.search(raw_song, limit=1)['tracks']['items'][0] artist = spotify.artist(meta_tags['artists'][0]['id']) album = spotify.album(meta_tags['album']['id']) try: meta_tags[u'genre'] = titlecase(artist['genres'][0]) except IndexError: meta_tags[u'genre'] = None meta_tags[u'release_date'] = album['release_date'] meta_tags[u'copyright'] = album['copyrights'][0]['text'] meta_tags[u'publisher'] = album['label'] meta_tags[u'total_tracks'] = album['tracks']['total'] #import pprint #pprint.pprint(meta_tags) #pprint.pprint(spotify.album(meta_tags['album']['id'])) return meta_tags def generate_YouTube_URL(raw_song): # decode spotify http link to "[artist] - [song]" song = generate_songname(raw_song) # generate direct search YouTube URL searchURL = misc.generate_search_URL(song) item = urllib2.urlopen(searchURL).read() items_parse = BeautifulSoup(item, "html.parser") check = 1 if args.manual: links = [] print(song) print('') print('0. Skip downloading this song') # fetch all video links on first page on YouTube for x in items_parse.find_all('h3', {'class': 'yt-lockup-title'}): # confirm the video result is not an advertisement if not x.find('channel') == -1 or not x.find('googleads') == -1: print(str(check) + '. ' + x.get_text()) links.append(x.find('a')['href']) check += 1 print('') # let user select the song to download result = misc.input_link(links) if result is None: return None else: # get video link of the first YouTube result result = items_parse.find_all(attrs={'class': 'yt-uix-tile-link'})[0]['href'] # confirm the video result is not an advertisement # otherwise keep iterating until it is not while not result.find('channel') == -1 or not result.find('googleads') == -1: result = items_parse.find_all(attrs={'class': 'yt-uix-tile-link'})[check]['href'] check += 1 full_link = "youtube.com" + result return full_link # parse track from YouTube def go_pafy(raw_song): # video link of the video to extract audio from trackURL = generate_YouTube_URL(raw_song) if trackURL is None: return None else: # parse the YouTube video return pafy.new(trackURL) # title of the YouTube video def get_YouTube_title(content, number): title = content.title if number is None: return title else: return str(number) + '. ' + title # fetch user playlists when using -u option def feed_playlist(username): # fetch all user playlists playlists = spotify.user_playlists(username) links = [] check = 1 # iterate over user playlists for playlist in playlists['items']: print(str(check) + '. ' + misc.fix_encoding(playlist['name']) + ' (' + str(playlist['tracks']['total']) + ' tracks)') links.append(playlist) check += 1 print('') # let user select playlist playlist = misc.input_link(links) # fetch detailed information for playlist results = spotify.user_playlist(playlist['owner']['id'], playlist['id'], fields="tracks,next") print('') # slugify removes any special characters file = slugify(playlist['name'], ok='-_()[]{}') + '.txt' print('Feeding ' + str(playlist['tracks']['total']) + ' tracks to ' + file) tracks = results['tracks'] # write tracks to file misc.feed_tracks(file, tracks) # check if there are more pages # 1 page = 50 results while tracks['next']: tracks = spotify.next(tracks) misc.feed_tracks(file, tracks) def download_song(content): music_file = misc.generate_filename(content.title) if args.input_ext == '.webm': # download best available audio in .webm link = content.getbestaudio(preftype='webm') if link is not None: link.download(filepath='Music/' + music_file + args.input_ext) else: # download best available audio in .webm link = content.getbestaudio(preftype='m4a') if link is not None: link.download(filepath='Music/' + music_file + args.input_ext) # convert song from input_ext to output_ext def convert_song(music_file): # skip conversion if input_ext == output_ext if not args.input_ext == args.output_ext: print('Converting ' + music_file + args.input_ext + ' to ' + args.output_ext[1:]) if args.ffmpeg: convert_with_FFmpeg(music_file) else: convert_with_libav(music_file) def convert_with_libav(music_file): # different path for windows if os.name == 'nt': avconv_path = 'Scripts\\avconv.exe' else: avconv_path = 'avconv' if args.verbose: level = 'debug' else: level = '0' command = [avconv_path, '-loglevel', level, '-i', 'Music/' + music_file + args.input_ext, '-ab', '192k', 'Music/' + music_file + args.output_ext] subprocess.call(command) os.remove('Music/' + music_file + args.input_ext) def convert_with_FFmpeg(music_file): # What are the differences and similarities between ffmpeg, libav, and avconv? # https://stackoverflow.com/questions/9477115 # ffmeg encoders high to lower quality # libopus > libvorbis >= libfdk_aac > aac > libmp3lame # libfdk_aac due to copyrights needs to be compiled by end user # on MacOS brew install ffmpeg --with-fdk-aac will do just that. Other OS? # https://trac.ffmpeg.org/wiki/Encode/AAC if os.name == "nt": ffmpeg_pre = 'Scripts//ffmpeg.exe ' else: ffmpeg_pre = 'ffmpeg ' ffmpeg_pre += '-y ' if not args.verbose: ffmpeg_pre += '-hide_banner -nostats -v panic ' if args.input_ext == '.m4a': if args.output_ext == '.mp3': ffmpeg_params = '-codec:v copy -codec:a libmp3lame -q:a 2 ' elif output_ext == '.webm': ffmpeg_params = '-c:a libopus -vbr on -b:a 192k -vn ' else: return elif args.input_ext == '.webm': if args.output_ext == '.mp3': ffmpeg_params = ' -ab 192k -ar 44100 -vn ' elif args.output_ext == '.m4a': ffmpeg_params = '-cutoff 20000 -c:a libfdk_aac -b:a 256k -vn ' else: return else: print('Unknown formats. Unable to convert.', args.input_ext, args.output_ext) return command = (ffmpeg_pre + '-i Music/' + music_file + args.input_ext + ' ' + ffmpeg_params + 'Music/' + music_file + args.output_ext + '').split(' ') subprocess.call(command) os.remove('Music/' + music_file + args.input_ext) # check if input song already exists in Music folder def check_exists(music_file, raw_song, islist): files = os.listdir("Music") for file in files: if file.endswith(".temp"): os.remove("Music/" + file) continue # check if any file with similar name is already present in Music/ if file.startswith(misc.generate_filename(music_file)): # check if the already downloaded song has correct metadata already_tagged = metadata.compare(file, generate_metadata(raw_song)) # if not, remove it and download again without prompt if misc.is_spotify(raw_song) and not already_tagged: os.remove("Music/" + file) return False # do not prompt and skip the current song if already downloaded when using list if islist: return True # if downloading only single song, prompt to re-download else: prompt = raw_input('Song with same name has already been downloaded. Re-download? (y/n): ').lower() if prompt == "y": os.remove("Music/" + file) return False else: return True # download songs from list def grab_list(file): with open(file, 'r') as listed: lines = (listed.read()).splitlines() # ignore blank lines in file (if any) try: lines.remove('') except ValueError: pass print('Total songs in list = ' + str(len(lines)) + ' songs') print('') # nth input song number = 1 for raw_song in lines: try: grab_single(raw_song, number=number) # token expires after 1 hour except spotipy.oauth2.SpotifyOauthError: # refresh token when it expires token = misc.generate_token() global spotify spotify = spotipy.Spotify(auth=token) grab_single(raw_song, number=number) # detect network problems except (urllib2.URLError, TypeError, IOError): lines.append(raw_song) # remove the downloaded song from .txt misc.trim_song(file) # and append it to the last line in .txt with open(file, 'a') as myfile: myfile.write(raw_song) print('Failed to download song. Will retry after other songs.') continue except KeyboardInterrupt: misc.grace_quit() finally: print('') misc.trim_song(file) number += 1 # logic behind downloading some song def grab_single(raw_song, number=None): # check if song is being downloaded from list if number: islist = True else: islist = False content = go_pafy(raw_song) if content is None: return # print "[number]. [artist] - [song]" if downloading from list # otherwise print "[artist] - [song]" print(get_YouTube_title(content, number)) # generate file name of the song to download music_file = misc.generate_filename(content.title) if not check_exists(music_file, raw_song, islist=islist): download_song(content) print('') convert_song(music_file) meta_tags = generate_metadata(raw_song) if not args.no_metadata: metadata.embed(music_file, meta_tags, args.output_ext) if __name__ == '__main__': # python 3 compatibility if sys.version_info > (3, 0): raw_input = input os.chdir(sys.path[0]) if not os.path.exists("Music"): os.makedirs("Music") # token is mandatory when using Spotify's API # https://developer.spotify.com/news-stories/2017/01/27/removing-unauthenticated-calls-to-the-web-api/ token = misc.generate_token() spotify = spotipy.Spotify(auth=token) # set up arguments args = misc.get_arguments() if args.song: grab_single(raw_song=args.song) elif args.list: grab_list(file=args.list) elif args.username: feed_playlist(username=args.username)