Add logging capability (#175)

* Refactoring and addition of logzero (#172) * Refactored convert.py and added logging. * Added logging and refactored. * Added logzero to requirements.txt * Added logging to metadata.py * Created a log in misc.py. Updated slugify import. * Some general improvement * Improve message layout * Improve test mechanism * Implement debug level logging * Fix some minor mistakes * Make pytest happy * Remove unimplemented --verbose option * Update ISSUE_TEMPLATE.md * Rename LICENSE * Remove obvious from log.debug() * Show track URL when writing to file (debug)
2026-02-10 19:39:25 +00:00 · 2017-12-15 19:57:57 +05:30
parent 8ea89f9d1c
commit df513acc35
12 changed files with 319 additions and 216 deletions
--- a/spotdl.py
+++ b/spotdl.py
@@ -1,9 +1,10 @@
 #!/usr/bin/env python
 # -*- coding: UTF-8 -*-

+from core import logger
 from core import metadata
 from core import convert
-from core import misc
+from core import internals
 from bs4 import BeautifulSoup
 from titlecase import titlecase
 from slugify import slugify
@@ -13,21 +14,26 @@ import urllib.request
 import os
 import sys
 import time
+import sys
+import platform
+import pprint


 def generate_songname(tags):
-    """Generate a string of the format '[artist] - [song]' for the given spotify song."""
+    """ Generate a string of the format '[artist] - [song]' for the given spotify song. """
    raw_song = u'{0} - {1}'.format(tags['artists'][0]['name'], tags['name'])
    return raw_song


 def generate_metadata(raw_song):
-    """Fetch a song's metadata from Spotify."""
-    if misc.is_spotify(raw_song):
+    """ Fetch a song's metadata from Spotify. """
+    if internals.is_spotify(raw_song):
        # fetch track information directly if it is spotify link
+        log.debug('Fetching metadata for given track URL')
        meta_tags = spotify.track(raw_song)
    else:
        # otherwise search on spotify and fetch information from first result
+        log.debug('Searching for "{}" on Spotify'.format(raw_song))
        try:
            meta_tags = spotify.search(raw_song, limit=1)['tracks']['items'][0]
        except IndexError:
@@ -52,49 +58,64 @@ def generate_metadata(raw_song):
    meta_tags[u'publisher'] = album['label']
    meta_tags[u'total_tracks'] = album['tracks']['total']

+    log.debug(pprint.pformat(meta_tags))
    return meta_tags


+def is_video(result):
+    # ensure result is not a channel
+    not_video = result.find('channel') is not None or \
+                'yt-lockup-channel' in result.parent.attrs['class'] or \
+                'yt-lockup-channel' in result.attrs['class']
+
+    # ensure result is not a mix/playlist
+    not_video = not_video or \
+               'yt-lockup-playlist' in result.parent.attrs['class']
+
+    # ensure video result is not an advertisement
+    not_video = not_video or \
+                result.find('googleads') is not None
+
+    video = not not_video
+    return video
+
+
 def generate_youtube_url(raw_song, meta_tags, tries_remaining=5):
-    """Search for the song on YouTube and generate a URL to its video."""
+    """ Search for the song on YouTube and generate a URL to its video. """
    # prevents an infinite loop but allows for a few retries
    if tries_remaining == 0:
+        log.debug('No tries left. I quit.')
        return

    if meta_tags is None:
        song = raw_song
-        search_url = misc.generate_search_url(song, viewsort=False)
+        search_url = internals.generate_search_url(song, viewsort=False)
    else:
        song = generate_songname(meta_tags)
-        search_url = misc.generate_search_url(song, viewsort=True)
+        search_url = internals.generate_search_url(song, viewsort=True)
+    log.debug('Opening URL: {0}'.format(search_url))

    item = urllib.request.urlopen(search_url).read()
-    # item = unicode(item, 'utf-8')
    items_parse = BeautifulSoup(item, "html.parser")

    videos = []
    for x in items_parse.find_all('div', {'class': 'yt-lockup-dismissable yt-uix-tile'}):
-        # ensure result is not a channel
-        if x.find('channel') is not None or 'yt-lockup-channel' in x.parent.attrs['class'] or 'yt-lockup-channel' in x.attrs['class']:
-            continue

-        # ensure result is not a mix/playlist
-        if 'yt-lockup-playlist' in x.parent.attrs['class']:
-            continue
-
-        # confirm the video result is not an advertisement
-        if x.find('googleads') is not None:
+        if not is_video(x):
            continue

        y = x.find('div', class_='yt-lockup-content')
        link = y.find('a')['href']
        title = y.find('a')['title']
+
        try:
            videotime = x.find('span', class_="video-time").get_text()
        except AttributeError:
+            log.debug('Could not find video duration on YouTube, retrying..')
            return generate_youtube_url(raw_song, meta_tags, tries_remaining - 1)

-        youtubedetails = {'link': link, 'title': title, 'videotime': videotime, 'seconds':misc.get_sec(videotime)}
+        youtubedetails = {'link': link, 'title': title, 'videotime': videotime,
+                          'seconds': internals.get_sec(videotime)}
        videos.append(youtubedetails)
        if meta_tags is None:
            break
@@ -102,20 +123,26 @@ def generate_youtube_url(raw_song, meta_tags, tries_remaining=5):
    if not videos:
        return None

+    log.debug(pprint.pformat(videos))
+
    if args.manual:
-        print(song)
-        print('')
-        print('0. Skip downloading this song')
+        log.info(song)
+        log.info('0. Skip downloading this song.\n')
        # fetch all video links on first page on YouTube
        for i, v in enumerate(videos):
-          print(u'{0}. {1} {2} {3}'.format(i+1, v['title'], v['videotime'], "http://youtube.com"+v['link']))
-        print('')
+            log.info(u'{0}. {1} {2} {3}'.format(i+1, v['title'], v['videotime'],
+                  "http://youtube.com"+v['link']))
        # let user select the song to download
-        result = misc.input_link(videos)
+        result = internals.input_link(videos)
        if result is None:
            return None
    else:
-        if meta_tags is not None:
+        if meta_tags is None:
+            # if the metadata could not be acquired, take the first result
+            # from Youtube because the proper song length is unknown
+            result = videos[0]
+            log.debug('Since no metadata found on Spotify, going with the first result')
+        else:
            # filter out videos that do not have a similar length to the Spotify song
            duration_tolerance = 10
            max_duration_tolerance = 20
@@ -130,24 +157,23 @@ def generate_youtube_url(raw_song, meta_tags, tries_remaining=5):
                possible_videos_by_duration = list(filter(lambda x: abs(x['seconds'] - (int(meta_tags['duration_ms'])/1000)) <= duration_tolerance, videos))
                duration_tolerance += 1
                if duration_tolerance > max_duration_tolerance:
-                    print(meta_tags['name'], 'by', meta_tags['artists'][0]['name'], 'was not found')
+                    log.error("{0} by {1} was not found.\n".format(meta_tags['name'],meta_tags['artists'][0]['name']))
                    return None

            result = possible_videos_by_duration[0]
-        else:
-            # if the metadata could not be acquired, take the first result from Youtube because the proper song length is unknown
-            result = videos[0]

-    full_link = None
    if result:
-        full_link = u'youtube.com{0}'.format(result['link'])
+        full_link = u'http://youtube.com{0}'.format(result['link'])
+    else:
+        full_link = None

+    log.debug('Best matching video link: {}'.format(full_link))
    return full_link


-def go_pafy(raw_song, meta_tags):
-    """Parse track from YouTube."""
-    if misc.is_youtube(raw_song):
+def go_pafy(raw_song, meta_tags=None):
+    """ Parse track from YouTube. """
+    if internals.is_youtube(raw_song):
        track_info = pafy.new(raw_song)
    else:
        track_url = generate_youtube_url(raw_song, meta_tags)
@@ -161,7 +187,7 @@ def go_pafy(raw_song, meta_tags):


 def get_youtube_title(content, number=None):
-    """Get the YouTube video's title."""
+    """ Get the YouTube video's title. """
    title = content.title
    if number is None:
        return title
@@ -170,7 +196,7 @@ def get_youtube_title(content, number=None):


 def feed_playlist(username):
-    """Fetch user playlists when using the -u option."""
+    """ Fetch user playlists when using the -u option. """
    playlists = spotify.user_playlists(username)
    links = []
    check = 1
@@ -180,9 +206,10 @@ def feed_playlist(username):
            # in rare cases, playlists may not be found, so playlists['next']
            # is None. Skip these. Also see Issue #91.
            if playlist['name'] is not None:
-                print(u'{0:>5}. {1:<30}  ({2} tracks)'.format(
+                log.info(u'{0:>5}. {1:<30}  ({2} tracks)'.format(
                    check, playlist['name'],
                    playlist['tracks']['total']))
+                log.debug(playlist['external_urls']['spotify'])
                links.append(playlist)
                check += 1
        if playlists['next']:
@@ -190,9 +217,7 @@ def feed_playlist(username):
        else:
            break

-    print('')
-    playlist = misc.input_link(links)
-    print('')
+    playlist = internals.input_link(links)
    write_playlist(playlist['owner']['id'], playlist['id'])


@@ -205,9 +230,11 @@ def write_tracks(text_file, tracks):
                else:
                    track = item
                try:
-                    file_out.write(track['external_urls']['spotify'] + '\n')
+                    track_url = track['external_urls']['spotify']
+                    file_out.write(track_url + '\n')
+                    log.debug(track_url)
                except KeyError:
-                    print(u'Skipping track {0} by {1} (local only?)'.format(
+                    log.warning(u'Skipping track {0} by {1} (local only?)'.format(
                        track['name'], track['artists'][0]['name']))
            # 1 page = 50 results
            # check if there are more pages
@@ -218,11 +245,11 @@ def write_tracks(text_file, tracks):


 def write_playlist(username, playlist_id):
-    results = spotify.user_playlist(
-            username, playlist_id, fields='tracks,next,name')
+    results = spotify.user_playlist(username, playlist_id,
+                                    fields='tracks,next,name')
    text_file = u'{0}.txt'.format(slugify(results['name'], ok='-_()[]{}'))
-
-    print(u'Feeding {0} tracks to {1}'.format(results['tracks']['total'], text_file))
+    log.info(u'Writing {0} tracks to {1}'.format(
+               results['tracks']['total'], text_file))
    tracks = results['tracks']
    write_tracks(text_file, tracks)

@@ -230,54 +257,61 @@ def write_playlist(username, playlist_id):
 def write_album(album):
    tracks = spotify.album_tracks(album['id'])
    text_file = u'{0}.txt'.format(slugify(album['name'], ok='-_()[]{}'))
-    print(u'Feeding {0} tracks to {1}'.format(tracks['total'], text_file))
-
+    log.info(u'writing {0} tracks to {1}'.format(
+               tracks['total'], text_file))
    write_tracks(text_file, tracks)


 def download_song(file_name, content):
-    """Download the audio file from YouTube."""
+    """ Download the audio file from YouTube. """
    if args.input_ext in (".webm", ".m4a"):
        link = content.getbestaudio(preftype=args.input_ext[1:])
    else:
        return False

+    log.debug('Downloading from URL: ' + link.url)
    if link is None:
        return False
    else:
-        link.download(
-            filepath='{0}{1}'.format(os.path.join(args.folder, file_name), args.input_ext))
+        filepath = '{0}{1}'.format(os.path.join(args.folder, file_name),
+                                   args.input_ext)
+        link.download(filepath=filepath)
        return True


 def check_exists(music_file, raw_song, meta_tags, islist=True):
-    """Check if the input song already exists in the given folder."""
+    """ Check if the input song already exists in the given folder. """
+    log.debug('Cleaning any temp files and checking '
+              'if "{}" already exists'.format(music_file))
    songs = os.listdir(args.folder)
    for song in songs:
        if song.endswith('.temp'):
            os.remove(os.path.join(args.folder, song))
            continue
        # check if any song with similar name is already present in the given folder
-        file_name = misc.sanitize_title(music_file)
+        file_name = internals.sanitize_title(music_file)
        if song.startswith(file_name):
-            # check if the already downloaded song has correct metadata
-            already_tagged = metadata.compare(os.path.join(args.folder, song), meta_tags)
+            log.debug('Found an already existing song: "{}"'.format(song))
+            if internals.is_spotify(raw_song):
+                # check if the already downloaded song has correct metadata
+                # if not, remove it and download again without prompt
+                already_tagged = metadata.compare(os.path.join(args.folder, song),
+                                                  meta_tags)
+                log.debug('Checking if it is already tagged correctly? {}',
+                                                            already_tagged)
+                if not already_tagged:
+                    os.remove(os.path.join(args.folder, song))
+                    return False

-            # if not, remove it and download again without prompt
-            if misc.is_spotify(raw_song) and not already_tagged:
-                os.remove(os.path.join(args.folder, song))
-                return False
-
-            # do not prompt and skip the current song
-            # if already downloaded when using list
-            if islist:
-                print('Song already exists')
-                return True
            # if downloading only single song, prompt to re-download
+            if islist:
+                log.warning('Song already exists')
+                return True
            else:
-                prompt = input('Song with same name has already been downloaded. '
-                               'Re-download? (y/n): ').lower()
-                if prompt == 'y':
+                log.info('Song with same name has already been downloaded. '
+                         'Re-download? (y/N): ')
+                prompt = input('> ')
+                if prompt.lower() == 'y':
                    os.remove(os.path.join(args.folder, song))
                    return False
                else:
@@ -286,7 +320,7 @@ def check_exists(music_file, raw_song, meta_tags, islist=True):


 def grab_list(text_file):
-    """Download all songs from the list."""
+    """ Download all songs from the list. """
    with open(text_file, 'r') as listed:
        lines = (listed.read()).splitlines()
    # ignore blank lines in text_file (if any)
@@ -294,37 +328,36 @@ def grab_list(text_file):
        lines.remove('')
    except ValueError:
        pass
-    print(u'Total songs in list: {0} songs'.format(len(lines)))
-    print('')
-    # nth input song
+    log.info(u'Preparing to download {} songs'.format(len(lines)))
    number = 1
+
    for raw_song in lines:
+        print('')
        try:
            grab_single(raw_song, number=number)
        # token expires after 1 hour
        except spotipy.client.SpotifyException:
            # refresh token when it expires
-            new_token = misc.generate_token()
+            log.debug('Token expired, generating new one and authorizing')
+            new_token = internals.generate_token()
            global spotify
            spotify = spotipy.Spotify(auth=new_token)
            grab_single(raw_song, number=number)
        # detect network problems
        except (urllib.request.URLError, TypeError, IOError):
            lines.append(raw_song)
-            # remove the downloaded song from .txt
-            misc.trim_song(text_file)
-            # and append it to the last line in .txt
+            # remove the downloaded song from file
+            internals.trim_song(text_file)
+            # and append it at the end of file
            with open(text_file, 'a') as myfile:
                myfile.write(raw_song + '\n')
-            print('Failed to download song. Will retry after other songs.')
+            log.warning('Failed to download song. Will retry after other songs\n')
            # wait 0.5 sec to avoid infinite looping
            time.sleep(0.5)
            continue
-        except KeyboardInterrupt:
-            misc.grace_quit()
-        finally:
-            print('')
-        misc.trim_song(text_file)
+
+        log.debug('Removing downloaded song from text file')
+        internals.trim_song(text_file)
        number += 1


@@ -340,14 +373,14 @@ def grab_playlist(playlist):
        username = splits[-3]
    except IndexError:
        # Wrong format, in either case
-        print('The provided playlist URL is not in a recognized format!')
+        log.error('The provided playlist URL is not in a recognized format!')
        sys.exit(10)
    playlist_id = splits[-1]
    try:
        write_playlist(username, playlist_id)
    except spotipy.client.SpotifyException:
-        print('Unable to find playlist')
-        print('Make sure the playlist is set to publicly visible and then try again')
+        log.error('Unable to find playlist')
+        log.info('Make sure the playlist is set to publicly visible and then try again')
        sys.exit(11)


@@ -366,74 +399,84 @@ def grab_album(album):


 def grab_single(raw_song, number=None):
-    """Logic behind downloading a song."""
+    """ Logic behind downloading a song. """
    if number:
        islist = True
    else:
        islist = False

-    if misc.is_youtube(raw_song):
+    if internals.is_youtube(raw_song):
+        log.debug('Input song is a YouTube URL')
+        content = go_pafy(raw_song, meta_tags=None)
        raw_song = slugify(content.title).replace('-', ' ')
+        meta_tags = generate_metadata(raw_song)
+    else:
+        meta_tags = generate_metadata(raw_song)
+        content = go_pafy(raw_song, meta_tags)

-    meta_tags = generate_metadata(raw_song)
-    content = go_pafy(raw_song, meta_tags)
    if content is None:
+        log.debug('Found no matching video')
        return

-    # print '[number]. [artist] - [song]' if downloading from list
-    # otherwise print '[artist] - [song]'
-    print(get_youtube_title(content, number))
+    # log '[number]. [artist] - [song]' if downloading from list
+    # otherwise log '[artist] - [song]'
+    log.info(get_youtube_title(content, number))
    # generate file name of the song to download
    songname = content.title

    if meta_tags is not None:
        refined_songname = generate_songname(meta_tags)
+        log.debug('Refining songname from "{0}" to "{1}"'.format(songname, refined_songname))
        if not refined_songname == ' - ':
            songname = refined_songname

-    file_name = misc.sanitize_title(songname)
+    file_name = internals.sanitize_title(songname)

    if not check_exists(file_name, raw_song, meta_tags, islist=islist):
        if download_song(file_name, content):
-            print('')
            input_song = file_name + args.input_ext
            output_song = file_name + args.output_ext
+            print('')
            convert.song(input_song, output_song, args.folder,
-                         avconv=args.avconv, verbose=args.verbose)
+                         avconv=args.avconv)
            if not args.input_ext == args.output_ext:
                os.remove(os.path.join(args.folder, input_song))

            if not args.no_metadata:
                metadata.embed(os.path.join(args.folder, output_song), meta_tags)
        else:
-            print('No audio streams available')
+            log.error('No audio streams available')


-class TestArgs(object):
-    manual = False
-    input_ext = '.m4a'
-    output_ext = '.mp3'
-    folder = 'Music/'
-
 # token is mandatory when using Spotify's API
 # https://developer.spotify.com/news-stories/2017/01/27/removing-unauthenticated-calls-to-the-web-api/
-token = misc.generate_token()
+token = internals.generate_token()
 spotify = spotipy.Spotify(auth=token)

 if __name__ == '__main__':
-    args = misc.get_arguments()
-    misc.filter_path(args.folder)
+    args = internals.get_arguments()
+    internals.filter_path(args.folder)

-    if args.song:
-        grab_single(raw_song=args.song)
-    elif args.list:
-        grab_list(text_file=args.list)
-    elif args.playlist:
-        grab_playlist(playlist=args.playlist)
-    elif args.album:
-        grab_album(album=args.album)
-    elif args.username:
-        feed_playlist(username=args.username)
-else:
-    misc.filter_path('Music')
-    args = TestArgs()
+    logger.log = logger.logzero.setup_logger(formatter=logger.formatter,
+                                      level=args.log_level)
+    log = logger.log
+    log.debug('Python version: {}'.format(sys.version))
+    log.debug('Platform: {}'.format(platform.platform()))
+    log.debug(pprint.pformat(args.__dict__))
+
+    try:
+        if args.song:
+            grab_single(raw_song=args.song)
+        elif args.list:
+            grab_list(text_file=args.list)
+        elif args.playlist:
+            grab_playlist(playlist=args.playlist)
+        elif args.album:
+            grab_album(album=args.album)
+        elif args.username:
+            feed_playlist(username=args.username)
+        sys.exit(0)
+
+    except KeyboardInterrupt as e:
+        log.exception(e)
+        sys.exit(-1)