From 58cfa121f3353e338c74bbf67703ce8c0f45a4ca Mon Sep 17 00:00:00 2001 From: Ritiek Malhotra Date: Thu, 11 Jan 2018 02:13:23 +0530 Subject: [PATCH] Write lyrics to track metadata (#194) * TODO: Don't throw traceback if no lyrics found * Add more metadata fields * Refactor debug logging * Fix traceback when lyrics not found * It already vomits metadata :3 * Bump lyricwikia >= 0.1.8 --- README.md | 3 ++- core/metadata.py | 62 +++++++++++++++++++++++++++++++++--------------- requirements.txt | 1 + spotdl.py | 28 +++++++++++++++------- 4 files changed, 66 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 7339fad..250f668 100755 --- a/README.md +++ b/README.md @@ -7,12 +7,13 @@ - Can also download a song by entering its artist and song name (in case if you don't have the Spotify's HTTP link for some song). -- Automatically fixes song's meta-tags which include: +- Automatically applies metadata to the downloaded song which include: - Title - Artist - Album - Album art + - Lyrics (if found on http://lyrics.wikia.com) - Album artist - Genre - Track number diff --git a/core/metadata.py b/core/metadata.py index 969f2e0..d8f02ce 100755 --- a/core/metadata.py +++ b/core/metadata.py @@ -1,5 +1,5 @@ from mutagen.easyid3 import EasyID3 -from mutagen.id3 import ID3, APIC +from mutagen.id3 import ID3, TORY, TYER, TPUB, APIC, USLT, COMM from mutagen.mp4 import MP4, MP4Cover from core.logger import log @@ -43,6 +43,9 @@ def embed(music_file, meta_tags): def embed_mp3(music_file, meta_tags): """ Embed metadata to MP3 files. """ # EasyID3 is fun to use ;) + # For supported easyid3 tags: + # https://github.com/quodlibet/mutagen/blob/master/mutagen/easyid3.py + # Check out somewhere at end of above linked file audiofile = EasyID3(music_file) audiofile['artist'] = meta_tags['artists'][0]['name'] audiofile['albumartist'] = meta_tags['artists'][0]['name'] @@ -59,7 +62,8 @@ def embed_mp3(music_file, meta_tags): audiofile['arranger'] = meta_tags['artists'][0]['name'] audiofile['performer'] = meta_tags['artists'][0]['name'] audiofile['website'] = meta_tags['external_urls']['spotify'] - audiofile['length'] = str(meta_tags['duration_ms'] / 1000) + audiofile['length'] = str(meta_tags['duration_ms'] / 1000.0) + if meta_tags['publisher']: audiofile['encodedby'] = meta_tags['publisher'] if meta_tags['genre']: @@ -69,14 +73,27 @@ def embed_mp3(music_file, meta_tags): if meta_tags['external_ids']['isrc']: audiofile['isrc'] = meta_tags['external_ids']['isrc'] audiofile.save(v2_version=3) + + # For supported id3 tags: + # https://github.com/quodlibet/mutagen/blob/master/mutagen/id3/_frames.py + # Each class represents an id3 tag audiofile = ID3(music_file) + print(meta_tags['release_date'].split('-')[0]) + year, *_ = meta_tags['release_date'].split('-') + audiofile['TORY'] = TORY(encoding=3, text=year) + audiofile['TYER'] = TYER(encoding=3, text=year) + audiofile['TPUB'] = TPUB(encoding=3, text=meta_tags['publisher']) + audiofile['COMM'] = COMM(encoding=3, text=meta_tags['external_urls']['spotify']) + if meta_tags['lyrics']: + audiofile['USLT'] = USLT(encoding=3, desc=u'Lyrics', text=meta_tags['lyrics']) try: albumart = urllib.request.urlopen(meta_tags['album']['images'][0]['url']) - audiofile["APIC"] = APIC(encoding=3, mime='image/jpeg', type=3, + audiofile['APIC'] = APIC(encoding=3, mime='image/jpeg', type=3, desc=u'Cover', data=albumart.read()) albumart.close() except IndexError: pass + audiofile.save(v2_version=3) return True @@ -85,22 +102,24 @@ def embed_m4a(music_file, meta_tags): """ Embed metadata to M4A files. """ # Apple has specific tags - see mutagen docs - # http://mutagen.readthedocs.io/en/latest/api/mp4.html - tags = {'album': '\xa9alb', - 'artist': '\xa9ART', - 'date': '\xa9day', - 'title': '\xa9nam', - 'originaldate': 'purd', - 'comment': '\xa9cmt', - 'group': '\xa9grp', - 'writer': '\xa9wrt', - 'genre': '\xa9gen', - 'tracknumber': 'trkn', - 'albumartist': 'aART', - 'disknumber': 'disk', - 'cpil': 'cpil', - 'albumart': 'covr', - 'copyright': 'cprt', - 'tempo': 'tmpo'} + tags = { 'album' : '\xa9alb', + 'artist' : '\xa9ART', + 'date' : '\xa9day', + 'title' : '\xa9nam', + 'year' : '\xa9day', + 'originaldate' : 'purd', + 'comment' : '\xa9cmt', + 'group' : '\xa9grp', + 'writer' : '\xa9wrt', + 'genre' : '\xa9gen', + 'tracknumber' : 'trkn', + 'albumartist' : 'aART', + 'disknumber' : 'disk', + 'cpil' : 'cpil', + 'albumart' : 'covr', + 'copyright' : 'cprt', + 'tempo' : 'tmpo', + 'lyrics' : '\xa9lyr' } audiofile = MP4(music_file) audiofile[tags['artist']] = meta_tags['artists'][0]['name'] @@ -111,11 +130,16 @@ def embed_m4a(music_file, meta_tags): meta_tags['total_tracks'])] audiofile[tags['disknumber']] = [(meta_tags['disc_number'], 0)] audiofile[tags['date']] = meta_tags['release_date'] + year, *_ = meta_tags['release_date'].split('-') + audiofile[tags['year']] = year audiofile[tags['originaldate']] = meta_tags['release_date'] + audiofile[tags['comment']] = meta_tags['external_urls']['spotify'] if meta_tags['genre']: audiofile[tags['genre']] = meta_tags['genre'] if meta_tags['copyright']: audiofile[tags['copyright']] = meta_tags['copyright'] + if meta_tags['lyrics']: + audiofile[tags['lyrics']] = meta_tags['lyrics'] try: albumart = urllib.request.urlopen(meta_tags['album']['images'][0]['url']) audiofile[tags['albumart']] = [MP4Cover( diff --git a/requirements.txt b/requirements.txt index 48b4b2c..540740e 100755 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,4 @@ mutagen >= 1.37 unicode-slugify >= 0.1.3 titlecase >= 0.10.0 logzero >= 1.3.1 +lyricwikia >= 0.1.8 diff --git a/spotdl.py b/spotdl.py index 5cff423..f678805 100755 --- a/spotdl.py +++ b/spotdl.py @@ -9,6 +9,7 @@ from titlecase import titlecase from slugify import slugify import spotipy import pafy +import lyricwikia import urllib.request import os import sys @@ -57,6 +58,19 @@ def generate_metadata(raw_song): meta_tags[u'publisher'] = album['label'] meta_tags[u'total_tracks'] = album['tracks']['total'] + log.debug('Fetching lyrics') + + try: + meta_tags['lyrics'] = lyricwikia.get_lyrics( + meta_tags['artists'][0]['name'], + meta_tags['name']) + except lyricwikia.LyricsNotFound: + meta_tags['lyrics'] = None + + # remove unused clutter when debug meta_tags + del meta_tags['available_markets'] + del meta_tags['album']['available_markets'] + log.debug(pprint.pformat(meta_tags)) return meta_tags @@ -99,15 +113,15 @@ def generate_youtube_url(raw_song, meta_tags, tries_remaining=5): else: song = generate_songname(meta_tags) query['q'] = song - log.debug('Query: {0}'.format(query)) + log.debug('query: {0}'.format(query)) data = pafy.call_gdata('search', query) - query2 = {'part': 'contentDetails,snippet,statistics', + query_results = {'part': 'contentDetails,snippet,statistics', 'maxResults': 50, 'id': ','.join(i['id']['videoId'] for i in data['items'])} - log.debug('Query2: {0}'.format(query2)) + log.debug('query_results: {0}'.format(query_results)) - vdata = pafy.call_gdata('videos', query2) + vdata = pafy.call_gdata('videos', query_results) videos = [] for x in vdata['items']: @@ -122,8 +136,6 @@ def generate_youtube_url(raw_song, meta_tags, tries_remaining=5): if not videos: return None - log.debug(pprint.pformat(videos)) - if args.manual: log.info(song) log.info('0. Skip downloading this song.\n') @@ -491,8 +503,8 @@ if __name__ == '__main__': elif args.username: feed_playlist(username=args.username) - # Actually we don't necessarily need this, but yeah... - # Explicit is better than implicit! + # actually we don't necessarily need this, but yeah... + # explicit is better than implicit! sys.exit(0) except KeyboardInterrupt as e: