mirror of
https://github.com/KevinMidboe/spotify-downloader.git
synced 2025-12-08 20:39:08 +00:00
Add logging capability (#175)
* Refactoring and addition of logzero (#172) * Refactored convert.py and added logging. * Added logging and refactored. * Added logzero to requirements.txt * Added logging to metadata.py * Created a log in misc.py. Updated slugify import. * Some general improvement * Improve message layout * Improve test mechanism * Implement debug level logging * Fix some minor mistakes * Make pytest happy * Remove unimplemented --verbose option * Update ISSUE_TEMPLATE.md * Rename LICENSE * Remove obvious from log.debug() * Show track URL when writing to file (debug)
This commit is contained in:
279
spotdl.py
279
spotdl.py
@@ -1,9 +1,10 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: UTF-8 -*-
|
||||
|
||||
from core import logger
|
||||
from core import metadata
|
||||
from core import convert
|
||||
from core import misc
|
||||
from core import internals
|
||||
from bs4 import BeautifulSoup
|
||||
from titlecase import titlecase
|
||||
from slugify import slugify
|
||||
@@ -13,21 +14,26 @@ import urllib.request
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import sys
|
||||
import platform
|
||||
import pprint
|
||||
|
||||
|
||||
def generate_songname(tags):
|
||||
"""Generate a string of the format '[artist] - [song]' for the given spotify song."""
|
||||
""" Generate a string of the format '[artist] - [song]' for the given spotify song. """
|
||||
raw_song = u'{0} - {1}'.format(tags['artists'][0]['name'], tags['name'])
|
||||
return raw_song
|
||||
|
||||
|
||||
def generate_metadata(raw_song):
|
||||
"""Fetch a song's metadata from Spotify."""
|
||||
if misc.is_spotify(raw_song):
|
||||
""" Fetch a song's metadata from Spotify. """
|
||||
if internals.is_spotify(raw_song):
|
||||
# fetch track information directly if it is spotify link
|
||||
log.debug('Fetching metadata for given track URL')
|
||||
meta_tags = spotify.track(raw_song)
|
||||
else:
|
||||
# otherwise search on spotify and fetch information from first result
|
||||
log.debug('Searching for "{}" on Spotify'.format(raw_song))
|
||||
try:
|
||||
meta_tags = spotify.search(raw_song, limit=1)['tracks']['items'][0]
|
||||
except IndexError:
|
||||
@@ -52,49 +58,64 @@ def generate_metadata(raw_song):
|
||||
meta_tags[u'publisher'] = album['label']
|
||||
meta_tags[u'total_tracks'] = album['tracks']['total']
|
||||
|
||||
log.debug(pprint.pformat(meta_tags))
|
||||
return meta_tags
|
||||
|
||||
|
||||
def is_video(result):
|
||||
# ensure result is not a channel
|
||||
not_video = result.find('channel') is not None or \
|
||||
'yt-lockup-channel' in result.parent.attrs['class'] or \
|
||||
'yt-lockup-channel' in result.attrs['class']
|
||||
|
||||
# ensure result is not a mix/playlist
|
||||
not_video = not_video or \
|
||||
'yt-lockup-playlist' in result.parent.attrs['class']
|
||||
|
||||
# ensure video result is not an advertisement
|
||||
not_video = not_video or \
|
||||
result.find('googleads') is not None
|
||||
|
||||
video = not not_video
|
||||
return video
|
||||
|
||||
|
||||
def generate_youtube_url(raw_song, meta_tags, tries_remaining=5):
|
||||
"""Search for the song on YouTube and generate a URL to its video."""
|
||||
""" Search for the song on YouTube and generate a URL to its video. """
|
||||
# prevents an infinite loop but allows for a few retries
|
||||
if tries_remaining == 0:
|
||||
log.debug('No tries left. I quit.')
|
||||
return
|
||||
|
||||
if meta_tags is None:
|
||||
song = raw_song
|
||||
search_url = misc.generate_search_url(song, viewsort=False)
|
||||
search_url = internals.generate_search_url(song, viewsort=False)
|
||||
else:
|
||||
song = generate_songname(meta_tags)
|
||||
search_url = misc.generate_search_url(song, viewsort=True)
|
||||
search_url = internals.generate_search_url(song, viewsort=True)
|
||||
log.debug('Opening URL: {0}'.format(search_url))
|
||||
|
||||
item = urllib.request.urlopen(search_url).read()
|
||||
# item = unicode(item, 'utf-8')
|
||||
items_parse = BeautifulSoup(item, "html.parser")
|
||||
|
||||
videos = []
|
||||
for x in items_parse.find_all('div', {'class': 'yt-lockup-dismissable yt-uix-tile'}):
|
||||
# ensure result is not a channel
|
||||
if x.find('channel') is not None or 'yt-lockup-channel' in x.parent.attrs['class'] or 'yt-lockup-channel' in x.attrs['class']:
|
||||
continue
|
||||
|
||||
# ensure result is not a mix/playlist
|
||||
if 'yt-lockup-playlist' in x.parent.attrs['class']:
|
||||
continue
|
||||
|
||||
# confirm the video result is not an advertisement
|
||||
if x.find('googleads') is not None:
|
||||
if not is_video(x):
|
||||
continue
|
||||
|
||||
y = x.find('div', class_='yt-lockup-content')
|
||||
link = y.find('a')['href']
|
||||
title = y.find('a')['title']
|
||||
|
||||
try:
|
||||
videotime = x.find('span', class_="video-time").get_text()
|
||||
except AttributeError:
|
||||
log.debug('Could not find video duration on YouTube, retrying..')
|
||||
return generate_youtube_url(raw_song, meta_tags, tries_remaining - 1)
|
||||
|
||||
youtubedetails = {'link': link, 'title': title, 'videotime': videotime, 'seconds':misc.get_sec(videotime)}
|
||||
youtubedetails = {'link': link, 'title': title, 'videotime': videotime,
|
||||
'seconds': internals.get_sec(videotime)}
|
||||
videos.append(youtubedetails)
|
||||
if meta_tags is None:
|
||||
break
|
||||
@@ -102,20 +123,26 @@ def generate_youtube_url(raw_song, meta_tags, tries_remaining=5):
|
||||
if not videos:
|
||||
return None
|
||||
|
||||
log.debug(pprint.pformat(videos))
|
||||
|
||||
if args.manual:
|
||||
print(song)
|
||||
print('')
|
||||
print('0. Skip downloading this song')
|
||||
log.info(song)
|
||||
log.info('0. Skip downloading this song.\n')
|
||||
# fetch all video links on first page on YouTube
|
||||
for i, v in enumerate(videos):
|
||||
print(u'{0}. {1} {2} {3}'.format(i+1, v['title'], v['videotime'], "http://youtube.com"+v['link']))
|
||||
print('')
|
||||
log.info(u'{0}. {1} {2} {3}'.format(i+1, v['title'], v['videotime'],
|
||||
"http://youtube.com"+v['link']))
|
||||
# let user select the song to download
|
||||
result = misc.input_link(videos)
|
||||
result = internals.input_link(videos)
|
||||
if result is None:
|
||||
return None
|
||||
else:
|
||||
if meta_tags is not None:
|
||||
if meta_tags is None:
|
||||
# if the metadata could not be acquired, take the first result
|
||||
# from Youtube because the proper song length is unknown
|
||||
result = videos[0]
|
||||
log.debug('Since no metadata found on Spotify, going with the first result')
|
||||
else:
|
||||
# filter out videos that do not have a similar length to the Spotify song
|
||||
duration_tolerance = 10
|
||||
max_duration_tolerance = 20
|
||||
@@ -130,24 +157,23 @@ def generate_youtube_url(raw_song, meta_tags, tries_remaining=5):
|
||||
possible_videos_by_duration = list(filter(lambda x: abs(x['seconds'] - (int(meta_tags['duration_ms'])/1000)) <= duration_tolerance, videos))
|
||||
duration_tolerance += 1
|
||||
if duration_tolerance > max_duration_tolerance:
|
||||
print(meta_tags['name'], 'by', meta_tags['artists'][0]['name'], 'was not found')
|
||||
log.error("{0} by {1} was not found.\n".format(meta_tags['name'],meta_tags['artists'][0]['name']))
|
||||
return None
|
||||
|
||||
result = possible_videos_by_duration[0]
|
||||
else:
|
||||
# if the metadata could not be acquired, take the first result from Youtube because the proper song length is unknown
|
||||
result = videos[0]
|
||||
|
||||
full_link = None
|
||||
if result:
|
||||
full_link = u'youtube.com{0}'.format(result['link'])
|
||||
full_link = u'http://youtube.com{0}'.format(result['link'])
|
||||
else:
|
||||
full_link = None
|
||||
|
||||
log.debug('Best matching video link: {}'.format(full_link))
|
||||
return full_link
|
||||
|
||||
|
||||
def go_pafy(raw_song, meta_tags):
|
||||
"""Parse track from YouTube."""
|
||||
if misc.is_youtube(raw_song):
|
||||
def go_pafy(raw_song, meta_tags=None):
|
||||
""" Parse track from YouTube. """
|
||||
if internals.is_youtube(raw_song):
|
||||
track_info = pafy.new(raw_song)
|
||||
else:
|
||||
track_url = generate_youtube_url(raw_song, meta_tags)
|
||||
@@ -161,7 +187,7 @@ def go_pafy(raw_song, meta_tags):
|
||||
|
||||
|
||||
def get_youtube_title(content, number=None):
|
||||
"""Get the YouTube video's title."""
|
||||
""" Get the YouTube video's title. """
|
||||
title = content.title
|
||||
if number is None:
|
||||
return title
|
||||
@@ -170,7 +196,7 @@ def get_youtube_title(content, number=None):
|
||||
|
||||
|
||||
def feed_playlist(username):
|
||||
"""Fetch user playlists when using the -u option."""
|
||||
""" Fetch user playlists when using the -u option. """
|
||||
playlists = spotify.user_playlists(username)
|
||||
links = []
|
||||
check = 1
|
||||
@@ -180,9 +206,10 @@ def feed_playlist(username):
|
||||
# in rare cases, playlists may not be found, so playlists['next']
|
||||
# is None. Skip these. Also see Issue #91.
|
||||
if playlist['name'] is not None:
|
||||
print(u'{0:>5}. {1:<30} ({2} tracks)'.format(
|
||||
log.info(u'{0:>5}. {1:<30} ({2} tracks)'.format(
|
||||
check, playlist['name'],
|
||||
playlist['tracks']['total']))
|
||||
log.debug(playlist['external_urls']['spotify'])
|
||||
links.append(playlist)
|
||||
check += 1
|
||||
if playlists['next']:
|
||||
@@ -190,9 +217,7 @@ def feed_playlist(username):
|
||||
else:
|
||||
break
|
||||
|
||||
print('')
|
||||
playlist = misc.input_link(links)
|
||||
print('')
|
||||
playlist = internals.input_link(links)
|
||||
write_playlist(playlist['owner']['id'], playlist['id'])
|
||||
|
||||
|
||||
@@ -205,9 +230,11 @@ def write_tracks(text_file, tracks):
|
||||
else:
|
||||
track = item
|
||||
try:
|
||||
file_out.write(track['external_urls']['spotify'] + '\n')
|
||||
track_url = track['external_urls']['spotify']
|
||||
file_out.write(track_url + '\n')
|
||||
log.debug(track_url)
|
||||
except KeyError:
|
||||
print(u'Skipping track {0} by {1} (local only?)'.format(
|
||||
log.warning(u'Skipping track {0} by {1} (local only?)'.format(
|
||||
track['name'], track['artists'][0]['name']))
|
||||
# 1 page = 50 results
|
||||
# check if there are more pages
|
||||
@@ -218,11 +245,11 @@ def write_tracks(text_file, tracks):
|
||||
|
||||
|
||||
def write_playlist(username, playlist_id):
|
||||
results = spotify.user_playlist(
|
||||
username, playlist_id, fields='tracks,next,name')
|
||||
results = spotify.user_playlist(username, playlist_id,
|
||||
fields='tracks,next,name')
|
||||
text_file = u'{0}.txt'.format(slugify(results['name'], ok='-_()[]{}'))
|
||||
|
||||
print(u'Feeding {0} tracks to {1}'.format(results['tracks']['total'], text_file))
|
||||
log.info(u'Writing {0} tracks to {1}'.format(
|
||||
results['tracks']['total'], text_file))
|
||||
tracks = results['tracks']
|
||||
write_tracks(text_file, tracks)
|
||||
|
||||
@@ -230,54 +257,61 @@ def write_playlist(username, playlist_id):
|
||||
def write_album(album):
|
||||
tracks = spotify.album_tracks(album['id'])
|
||||
text_file = u'{0}.txt'.format(slugify(album['name'], ok='-_()[]{}'))
|
||||
print(u'Feeding {0} tracks to {1}'.format(tracks['total'], text_file))
|
||||
|
||||
log.info(u'writing {0} tracks to {1}'.format(
|
||||
tracks['total'], text_file))
|
||||
write_tracks(text_file, tracks)
|
||||
|
||||
|
||||
def download_song(file_name, content):
|
||||
"""Download the audio file from YouTube."""
|
||||
""" Download the audio file from YouTube. """
|
||||
if args.input_ext in (".webm", ".m4a"):
|
||||
link = content.getbestaudio(preftype=args.input_ext[1:])
|
||||
else:
|
||||
return False
|
||||
|
||||
log.debug('Downloading from URL: ' + link.url)
|
||||
if link is None:
|
||||
return False
|
||||
else:
|
||||
link.download(
|
||||
filepath='{0}{1}'.format(os.path.join(args.folder, file_name), args.input_ext))
|
||||
filepath = '{0}{1}'.format(os.path.join(args.folder, file_name),
|
||||
args.input_ext)
|
||||
link.download(filepath=filepath)
|
||||
return True
|
||||
|
||||
|
||||
def check_exists(music_file, raw_song, meta_tags, islist=True):
|
||||
"""Check if the input song already exists in the given folder."""
|
||||
""" Check if the input song already exists in the given folder. """
|
||||
log.debug('Cleaning any temp files and checking '
|
||||
'if "{}" already exists'.format(music_file))
|
||||
songs = os.listdir(args.folder)
|
||||
for song in songs:
|
||||
if song.endswith('.temp'):
|
||||
os.remove(os.path.join(args.folder, song))
|
||||
continue
|
||||
# check if any song with similar name is already present in the given folder
|
||||
file_name = misc.sanitize_title(music_file)
|
||||
file_name = internals.sanitize_title(music_file)
|
||||
if song.startswith(file_name):
|
||||
# check if the already downloaded song has correct metadata
|
||||
already_tagged = metadata.compare(os.path.join(args.folder, song), meta_tags)
|
||||
log.debug('Found an already existing song: "{}"'.format(song))
|
||||
if internals.is_spotify(raw_song):
|
||||
# check if the already downloaded song has correct metadata
|
||||
# if not, remove it and download again without prompt
|
||||
already_tagged = metadata.compare(os.path.join(args.folder, song),
|
||||
meta_tags)
|
||||
log.debug('Checking if it is already tagged correctly? {}',
|
||||
already_tagged)
|
||||
if not already_tagged:
|
||||
os.remove(os.path.join(args.folder, song))
|
||||
return False
|
||||
|
||||
# if not, remove it and download again without prompt
|
||||
if misc.is_spotify(raw_song) and not already_tagged:
|
||||
os.remove(os.path.join(args.folder, song))
|
||||
return False
|
||||
|
||||
# do not prompt and skip the current song
|
||||
# if already downloaded when using list
|
||||
if islist:
|
||||
print('Song already exists')
|
||||
return True
|
||||
# if downloading only single song, prompt to re-download
|
||||
if islist:
|
||||
log.warning('Song already exists')
|
||||
return True
|
||||
else:
|
||||
prompt = input('Song with same name has already been downloaded. '
|
||||
'Re-download? (y/n): ').lower()
|
||||
if prompt == 'y':
|
||||
log.info('Song with same name has already been downloaded. '
|
||||
'Re-download? (y/N): ')
|
||||
prompt = input('> ')
|
||||
if prompt.lower() == 'y':
|
||||
os.remove(os.path.join(args.folder, song))
|
||||
return False
|
||||
else:
|
||||
@@ -286,7 +320,7 @@ def check_exists(music_file, raw_song, meta_tags, islist=True):
|
||||
|
||||
|
||||
def grab_list(text_file):
|
||||
"""Download all songs from the list."""
|
||||
""" Download all songs from the list. """
|
||||
with open(text_file, 'r') as listed:
|
||||
lines = (listed.read()).splitlines()
|
||||
# ignore blank lines in text_file (if any)
|
||||
@@ -294,37 +328,36 @@ def grab_list(text_file):
|
||||
lines.remove('')
|
||||
except ValueError:
|
||||
pass
|
||||
print(u'Total songs in list: {0} songs'.format(len(lines)))
|
||||
print('')
|
||||
# nth input song
|
||||
log.info(u'Preparing to download {} songs'.format(len(lines)))
|
||||
number = 1
|
||||
|
||||
for raw_song in lines:
|
||||
print('')
|
||||
try:
|
||||
grab_single(raw_song, number=number)
|
||||
# token expires after 1 hour
|
||||
except spotipy.client.SpotifyException:
|
||||
# refresh token when it expires
|
||||
new_token = misc.generate_token()
|
||||
log.debug('Token expired, generating new one and authorizing')
|
||||
new_token = internals.generate_token()
|
||||
global spotify
|
||||
spotify = spotipy.Spotify(auth=new_token)
|
||||
grab_single(raw_song, number=number)
|
||||
# detect network problems
|
||||
except (urllib.request.URLError, TypeError, IOError):
|
||||
lines.append(raw_song)
|
||||
# remove the downloaded song from .txt
|
||||
misc.trim_song(text_file)
|
||||
# and append it to the last line in .txt
|
||||
# remove the downloaded song from file
|
||||
internals.trim_song(text_file)
|
||||
# and append it at the end of file
|
||||
with open(text_file, 'a') as myfile:
|
||||
myfile.write(raw_song + '\n')
|
||||
print('Failed to download song. Will retry after other songs.')
|
||||
log.warning('Failed to download song. Will retry after other songs\n')
|
||||
# wait 0.5 sec to avoid infinite looping
|
||||
time.sleep(0.5)
|
||||
continue
|
||||
except KeyboardInterrupt:
|
||||
misc.grace_quit()
|
||||
finally:
|
||||
print('')
|
||||
misc.trim_song(text_file)
|
||||
|
||||
log.debug('Removing downloaded song from text file')
|
||||
internals.trim_song(text_file)
|
||||
number += 1
|
||||
|
||||
|
||||
@@ -340,14 +373,14 @@ def grab_playlist(playlist):
|
||||
username = splits[-3]
|
||||
except IndexError:
|
||||
# Wrong format, in either case
|
||||
print('The provided playlist URL is not in a recognized format!')
|
||||
log.error('The provided playlist URL is not in a recognized format!')
|
||||
sys.exit(10)
|
||||
playlist_id = splits[-1]
|
||||
try:
|
||||
write_playlist(username, playlist_id)
|
||||
except spotipy.client.SpotifyException:
|
||||
print('Unable to find playlist')
|
||||
print('Make sure the playlist is set to publicly visible and then try again')
|
||||
log.error('Unable to find playlist')
|
||||
log.info('Make sure the playlist is set to publicly visible and then try again')
|
||||
sys.exit(11)
|
||||
|
||||
|
||||
@@ -366,74 +399,84 @@ def grab_album(album):
|
||||
|
||||
|
||||
def grab_single(raw_song, number=None):
|
||||
"""Logic behind downloading a song."""
|
||||
""" Logic behind downloading a song. """
|
||||
if number:
|
||||
islist = True
|
||||
else:
|
||||
islist = False
|
||||
|
||||
if misc.is_youtube(raw_song):
|
||||
if internals.is_youtube(raw_song):
|
||||
log.debug('Input song is a YouTube URL')
|
||||
content = go_pafy(raw_song, meta_tags=None)
|
||||
raw_song = slugify(content.title).replace('-', ' ')
|
||||
meta_tags = generate_metadata(raw_song)
|
||||
else:
|
||||
meta_tags = generate_metadata(raw_song)
|
||||
content = go_pafy(raw_song, meta_tags)
|
||||
|
||||
meta_tags = generate_metadata(raw_song)
|
||||
content = go_pafy(raw_song, meta_tags)
|
||||
if content is None:
|
||||
log.debug('Found no matching video')
|
||||
return
|
||||
|
||||
# print '[number]. [artist] - [song]' if downloading from list
|
||||
# otherwise print '[artist] - [song]'
|
||||
print(get_youtube_title(content, number))
|
||||
# log '[number]. [artist] - [song]' if downloading from list
|
||||
# otherwise log '[artist] - [song]'
|
||||
log.info(get_youtube_title(content, number))
|
||||
# generate file name of the song to download
|
||||
songname = content.title
|
||||
|
||||
if meta_tags is not None:
|
||||
refined_songname = generate_songname(meta_tags)
|
||||
log.debug('Refining songname from "{0}" to "{1}"'.format(songname, refined_songname))
|
||||
if not refined_songname == ' - ':
|
||||
songname = refined_songname
|
||||
|
||||
file_name = misc.sanitize_title(songname)
|
||||
file_name = internals.sanitize_title(songname)
|
||||
|
||||
if not check_exists(file_name, raw_song, meta_tags, islist=islist):
|
||||
if download_song(file_name, content):
|
||||
print('')
|
||||
input_song = file_name + args.input_ext
|
||||
output_song = file_name + args.output_ext
|
||||
print('')
|
||||
convert.song(input_song, output_song, args.folder,
|
||||
avconv=args.avconv, verbose=args.verbose)
|
||||
avconv=args.avconv)
|
||||
if not args.input_ext == args.output_ext:
|
||||
os.remove(os.path.join(args.folder, input_song))
|
||||
|
||||
if not args.no_metadata:
|
||||
metadata.embed(os.path.join(args.folder, output_song), meta_tags)
|
||||
else:
|
||||
print('No audio streams available')
|
||||
log.error('No audio streams available')
|
||||
|
||||
|
||||
class TestArgs(object):
|
||||
manual = False
|
||||
input_ext = '.m4a'
|
||||
output_ext = '.mp3'
|
||||
folder = 'Music/'
|
||||
|
||||
# token is mandatory when using Spotify's API
|
||||
# https://developer.spotify.com/news-stories/2017/01/27/removing-unauthenticated-calls-to-the-web-api/
|
||||
token = misc.generate_token()
|
||||
token = internals.generate_token()
|
||||
spotify = spotipy.Spotify(auth=token)
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = misc.get_arguments()
|
||||
misc.filter_path(args.folder)
|
||||
args = internals.get_arguments()
|
||||
internals.filter_path(args.folder)
|
||||
|
||||
if args.song:
|
||||
grab_single(raw_song=args.song)
|
||||
elif args.list:
|
||||
grab_list(text_file=args.list)
|
||||
elif args.playlist:
|
||||
grab_playlist(playlist=args.playlist)
|
||||
elif args.album:
|
||||
grab_album(album=args.album)
|
||||
elif args.username:
|
||||
feed_playlist(username=args.username)
|
||||
else:
|
||||
misc.filter_path('Music')
|
||||
args = TestArgs()
|
||||
logger.log = logger.logzero.setup_logger(formatter=logger.formatter,
|
||||
level=args.log_level)
|
||||
log = logger.log
|
||||
log.debug('Python version: {}'.format(sys.version))
|
||||
log.debug('Platform: {}'.format(platform.platform()))
|
||||
log.debug(pprint.pformat(args.__dict__))
|
||||
|
||||
try:
|
||||
if args.song:
|
||||
grab_single(raw_song=args.song)
|
||||
elif args.list:
|
||||
grab_list(text_file=args.list)
|
||||
elif args.playlist:
|
||||
grab_playlist(playlist=args.playlist)
|
||||
elif args.album:
|
||||
grab_album(album=args.album)
|
||||
elif args.username:
|
||||
feed_playlist(username=args.username)
|
||||
sys.exit(0)
|
||||
|
||||
except KeyboardInterrupt as e:
|
||||
log.exception(e)
|
||||
sys.exit(-1)
|
||||
|
||||
Reference in New Issue
Block a user