Files
spotify-downloader/spotdl.py
2018-01-09 17:12:26 +05:30

382 lines
13 KiB
Python
Executable File

#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
from core import logger
from core import metadata
from core import convert
from core import internals
from core import spotify_tools
from slugify import slugify
import spotipy
import pafy
import urllib.request
import os
import sys
import time
import sys
import platform
import pprint
def generate_songname(tags):
""" Generate a string of the format '[artist] - [song]' for the given spotify song. """
raw_song = u'{0} - {1}'.format(tags['artists'][0]['name'], tags['name'])
return raw_song
def is_video(result):
# ensure result is not a channel
not_video = result.find('channel') is not None or \
'yt-lockup-channel' in result.parent.attrs['class'] or \
'yt-lockup-channel' in result.attrs['class']
# ensure result is not a mix/playlist
not_video = not_video or \
'yt-lockup-playlist' in result.parent.attrs['class']
# ensure video result is not an advertisement
not_video = not_video or \
result.find('googleads') is not None
video = not not_video
return video
def generate_youtube_url(raw_song, meta_tags, tries_remaining=5):
""" Search for the song on YouTube and generate a URL to its video. """
# prevents an infinite loop but allows for a few retries
if tries_remaining == 0:
log.debug('No tries left. I quit.')
return
query = {'part': 'snippet',
'maxResults': 50,
'type': 'video'}
if args.music_videos_only:
query['videoCategoryId'] = '10'
if not meta_tags:
song = raw_song
query['q'] = song
else:
song = generate_songname(meta_tags)
query['q'] = song
log.debug('Query: {0}'.format(query))
data = pafy.call_gdata('search', query)
query2 = {'part': 'contentDetails,snippet,statistics',
'maxResults': 50,
'id': ','.join(i['id']['videoId'] for i in data['items'])}
log.debug('Query2: {0}'.format(query2))
vdata = pafy.call_gdata('videos', query2)
videos = []
for x in vdata['items']:
duration_s = pafy.playlist.parseISO8591(x['contentDetails']['duration'])
youtubedetails = {'link': x['id'], 'title': x['snippet']['title'],
'videotime':internals.videotime_from_seconds(duration_s),
'seconds': duration_s}
videos.append(youtubedetails)
if not meta_tags:
break
if not videos:
return None
log.debug(pprint.pformat(videos))
if args.manual:
log.info(song)
log.info('0. Skip downloading this song.\n')
# fetch all video links on first page on YouTube
for i, v in enumerate(videos):
log.info(u'{0}. {1} {2} {3}'.format(i+1, v['title'], v['videotime'],
"http://youtube.com/watch?v="+v['link']))
# let user select the song to download
result = internals.input_link(videos)
if not result:
return None
else:
if not meta_tags:
# if the metadata could not be acquired, take the first result
# from Youtube because the proper song length is unknown
result = videos[0]
log.debug('Since no metadata found on Spotify, going with the first result')
else:
# filter out videos that do not have a similar length to the Spotify song
duration_tolerance = 10
max_duration_tolerance = 20
possible_videos_by_duration = list()
'''
start with a reasonable duration_tolerance, and increment duration_tolerance
until one of the Youtube results falls within the correct duration or
the duration_tolerance has reached the max_duration_tolerance
'''
while len(possible_videos_by_duration) == 0:
possible_videos_by_duration = list(filter(lambda x: abs(x['seconds'] - (int(meta_tags['duration_ms'])/1000)) <= duration_tolerance, videos))
duration_tolerance += 1
if duration_tolerance > max_duration_tolerance:
log.error("{0} by {1} was not found.\n".format(meta_tags['name'],meta_tags['artists'][0]['name']))
return None
result = possible_videos_by_duration[0]
if result:
url = "http://youtube.com/watch?v=" + result['link']
else:
url = None
return url
def go_pafy(raw_song, meta_tags=None):
""" Parse track from YouTube. """
if internals.is_youtube(raw_song):
track_info = pafy.new(raw_song)
else:
track_url = generate_youtube_url(raw_song, meta_tags)
if track_url:
track_info = pafy.new(track_url)
else:
track_info = None
return track_info
def get_youtube_title(content, number=None):
""" Get the YouTube video's title. """
title = content.title
if number:
return '{0}. {1}'.format(number, title)
else:
return title
def download_song(file_name, content):
""" Download the audio file from YouTube. """
if args.input_ext in (".webm", ".m4a"):
link = content.getbestaudio(preftype=args.input_ext[1:])
else:
return False
if link:
log.debug('Downloading from URL: ' + link.url)
filepath = '{0}{1}'.format(os.path.join(args.folder, file_name),
args.input_ext)
log.debug('Saving to: ' + filepath)
link.download(filepath=filepath)
return True
else:
return False
def check_exists(music_file, raw_song, meta_tags):
""" Check if the input song already exists in the given folder. """
log.debug('Cleaning any temp files and checking '
'if "{}" already exists'.format(music_file))
songs = os.listdir(args.folder)
for song in songs:
if song.endswith('.temp'):
os.remove(os.path.join(args.folder, song))
continue
# check if any song with similar name is already present in the given folder
file_name = internals.sanitize_title(music_file)
if song.startswith(file_name):
log.debug('Found an already existing song: "{}"'.format(song))
if internals.is_spotify(raw_song):
# check if the already downloaded song has correct metadata
# if not, remove it and download again without prompt
already_tagged = metadata.compare(os.path.join(args.folder, song),
meta_tags)
log.debug('Checking if it is already tagged correctly? {}',
already_tagged)
if not already_tagged:
os.remove(os.path.join(args.folder, song))
return False
log.warning('"{}" already exists'.format(song))
if args.overwrite == 'prompt':
log.info('"{}" has already been downloaded. '
'Re-download? (y/N): '.format(song))
prompt = input('> ')
if prompt.lower() == 'y':
os.remove(os.path.join(args.folder, song))
return False
else:
return True
elif args.overwrite == 'force':
os.remove(os.path.join(args.folder, song))
log.info('Overwriting "{}"'.format(song))
return False
elif args.overwrite == 'skip':
log.info('Skipping "{}"'.format(song))
return True
return False
def grab_list(text_file):
""" Download all songs from the list. """
with open(text_file, 'r') as listed:
lines = (listed.read()).splitlines()
# ignore blank lines in text_file (if any)
try:
lines.remove('')
except ValueError:
pass
log.info(u'Preparing to download {} songs'.format(len(lines)))
number = 1
for raw_song in lines:
print('')
try:
grab_single(raw_song, number=number)
# token expires after 1 hour
except spotipy.client.SpotifyException:
# refresh token when it expires
log.debug('Token expired, generating new one and authorizing')
new_token = spotify_tools.generate_token()
global spotify
spotify = spotipy.Spotify(auth=new_token)
grab_single(raw_song, number=number)
# detect network problems
except (urllib.request.URLError, TypeError, IOError):
lines.append(raw_song)
# remove the downloaded song from file
internals.trim_song(text_file)
# and append it at the end of file
with open(text_file, 'a') as myfile:
myfile.write(raw_song + '\n')
log.warning('Failed to download song. Will retry after other songs\n')
# wait 0.5 sec to avoid infinite looping
time.sleep(0.5)
continue
log.debug('Removing downloaded song from text file')
internals.trim_song(text_file)
number += 1
def grab_playlist(playlist):
if '/' in playlist:
if playlist.endswith('/'):
playlist = playlist[:-1]
splits = playlist.split('/')
else:
splits = playlist.split(':')
try:
username = splits[-3]
except IndexError:
# Wrong format, in either case
log.error('The provided playlist URL is not in a recognized format!')
sys.exit(10)
playlist_id = splits[-1]
try:
spotify_tools.write_playlist(username, playlist_id)
except spotipy.client.SpotifyException:
log.error('Unable to find playlist')
log.info('Make sure the playlist is set to publicly visible and then try again')
sys.exit(11)
def grab_single(raw_song, number=None):
""" Logic behind downloading a song. """
if internals.is_youtube(raw_song):
log.debug('Input song is a YouTube URL')
content = go_pafy(raw_song, meta_tags=None)
raw_song = slugify(content.title).replace('-', ' ')
meta_tags = spotify_tools.generate_metadata(raw_song)
else:
meta_tags = spotify_tools.generate_metadata(raw_song)
content = go_pafy(raw_song, meta_tags)
if not content:
log.debug('Found no matching video')
return
# "[number]. [artist] - [song]" if downloading from list
# otherwise "[artist] - [song]"
youtube_title = get_youtube_title(content, number)
log.info('{} ({})'.format(youtube_title, content.watchv_url))
# generate file name of the song to download
songname = content.title
if meta_tags:
refined_songname = generate_songname(meta_tags)
log.debug('Refining songname from "{0}" to "{1}"'.format(songname, refined_songname))
if not refined_songname == ' - ':
songname = refined_songname
if args.dry_run:
return
file_name = internals.sanitize_title(songname)
if not check_exists(file_name, raw_song, meta_tags):
if download_song(file_name, content):
input_song = file_name + args.input_ext
output_song = file_name + args.output_ext
print('')
try:
convert.song(input_song, output_song, args.folder,
avconv=args.avconv)
except FileNotFoundError:
encoder = 'avconv' if args.avconv else 'ffmpeg'
log.warning('Could not find {0}, skipping conversion'.format(encoder))
args.output_ext = args.input_ext
output_song = file_name + args.output_ext
if not args.input_ext == args.output_ext:
os.remove(os.path.join(args.folder, input_song))
if not args.no_metadata:
if metadata:
metadata.embed(os.path.join(args.folder, output_song), meta_tags)
else:
log.warning('Could not find metadata')
else:
log.error('No audio streams available')
# token is mandatory when using Spotify's API
# https://developer.spotify.com/news-stories/2017/01/27/removing-unauthenticated-calls-to-the-web-api/
token = spotify_tools.generate_token()
spotify = spotipy.Spotify(auth=token)
if __name__ == '__main__':
args = internals.get_arguments()
internals.filter_path(args.folder)
logger.log = logger.logzero.setup_logger(formatter=logger.formatter,
level=args.log_level)
log = logger.log
log.debug('Python version: {}'.format(sys.version))
log.debug('Platform: {}'.format(platform.platform()))
log.debug(pprint.pformat(args.__dict__))
try:
if args.song:
grab_single(raw_song=args.song)
elif args.list:
grab_list(text_file=args.list)
elif args.playlist:
grab_playlist(playlist=args.playlist)
elif args.album:
spotify_tools.grab_album(album=args.album)
elif args.username:
spotify_tools.feed_playlist(username=args.username)
# Actually we don't necessarily need this, but yeah...
# Explicit is better than implicit!
sys.exit(0)
except KeyboardInterrupt as e:
log.exception(e)
sys.exit(3)