Write lyrics to track metadata (#194)

* TODO: Don't throw traceback if no lyrics found

* Add more metadata fields

* Refactor debug logging

* Fix traceback when lyrics not found

* It already vomits metadata :3

* Bump lyricwikia >= 0.1.8
This commit is contained in:
Ritiek Malhotra
2018-01-11 02:13:23 +05:30
committed by GitHub
parent fb70ad32bb
commit 58cfa121f3
4 changed files with 66 additions and 28 deletions

View File

@@ -7,12 +7,13 @@
- Can also download a song by entering its artist and song name (in case if you don't have the Spotify's HTTP link for some song). - Can also download a song by entering its artist and song name (in case if you don't have the Spotify's HTTP link for some song).
- Automatically fixes song's meta-tags which include: - Automatically applies metadata to the downloaded song which include:
- Title - Title
- Artist - Artist
- Album - Album
- Album art - Album art
- Lyrics (if found on http://lyrics.wikia.com)
- Album artist - Album artist
- Genre - Genre
- Track number - Track number

View File

@@ -1,5 +1,5 @@
from mutagen.easyid3 import EasyID3 from mutagen.easyid3 import EasyID3
from mutagen.id3 import ID3, APIC from mutagen.id3 import ID3, TORY, TYER, TPUB, APIC, USLT, COMM
from mutagen.mp4 import MP4, MP4Cover from mutagen.mp4 import MP4, MP4Cover
from core.logger import log from core.logger import log
@@ -43,6 +43,9 @@ def embed(music_file, meta_tags):
def embed_mp3(music_file, meta_tags): def embed_mp3(music_file, meta_tags):
""" Embed metadata to MP3 files. """ """ Embed metadata to MP3 files. """
# EasyID3 is fun to use ;) # EasyID3 is fun to use ;)
# For supported easyid3 tags:
# https://github.com/quodlibet/mutagen/blob/master/mutagen/easyid3.py
# Check out somewhere at end of above linked file
audiofile = EasyID3(music_file) audiofile = EasyID3(music_file)
audiofile['artist'] = meta_tags['artists'][0]['name'] audiofile['artist'] = meta_tags['artists'][0]['name']
audiofile['albumartist'] = meta_tags['artists'][0]['name'] audiofile['albumartist'] = meta_tags['artists'][0]['name']
@@ -59,7 +62,8 @@ def embed_mp3(music_file, meta_tags):
audiofile['arranger'] = meta_tags['artists'][0]['name'] audiofile['arranger'] = meta_tags['artists'][0]['name']
audiofile['performer'] = meta_tags['artists'][0]['name'] audiofile['performer'] = meta_tags['artists'][0]['name']
audiofile['website'] = meta_tags['external_urls']['spotify'] audiofile['website'] = meta_tags['external_urls']['spotify']
audiofile['length'] = str(meta_tags['duration_ms'] / 1000) audiofile['length'] = str(meta_tags['duration_ms'] / 1000.0)
if meta_tags['publisher']: if meta_tags['publisher']:
audiofile['encodedby'] = meta_tags['publisher'] audiofile['encodedby'] = meta_tags['publisher']
if meta_tags['genre']: if meta_tags['genre']:
@@ -69,14 +73,27 @@ def embed_mp3(music_file, meta_tags):
if meta_tags['external_ids']['isrc']: if meta_tags['external_ids']['isrc']:
audiofile['isrc'] = meta_tags['external_ids']['isrc'] audiofile['isrc'] = meta_tags['external_ids']['isrc']
audiofile.save(v2_version=3) audiofile.save(v2_version=3)
# For supported id3 tags:
# https://github.com/quodlibet/mutagen/blob/master/mutagen/id3/_frames.py
# Each class represents an id3 tag
audiofile = ID3(music_file) audiofile = ID3(music_file)
print(meta_tags['release_date'].split('-')[0])
year, *_ = meta_tags['release_date'].split('-')
audiofile['TORY'] = TORY(encoding=3, text=year)
audiofile['TYER'] = TYER(encoding=3, text=year)
audiofile['TPUB'] = TPUB(encoding=3, text=meta_tags['publisher'])
audiofile['COMM'] = COMM(encoding=3, text=meta_tags['external_urls']['spotify'])
if meta_tags['lyrics']:
audiofile['USLT'] = USLT(encoding=3, desc=u'Lyrics', text=meta_tags['lyrics'])
try: try:
albumart = urllib.request.urlopen(meta_tags['album']['images'][0]['url']) albumart = urllib.request.urlopen(meta_tags['album']['images'][0]['url'])
audiofile["APIC"] = APIC(encoding=3, mime='image/jpeg', type=3, audiofile['APIC'] = APIC(encoding=3, mime='image/jpeg', type=3,
desc=u'Cover', data=albumart.read()) desc=u'Cover', data=albumart.read())
albumart.close() albumart.close()
except IndexError: except IndexError:
pass pass
audiofile.save(v2_version=3) audiofile.save(v2_version=3)
return True return True
@@ -85,22 +102,24 @@ def embed_m4a(music_file, meta_tags):
""" Embed metadata to M4A files. """ """ Embed metadata to M4A files. """
# Apple has specific tags - see mutagen docs - # Apple has specific tags - see mutagen docs -
# http://mutagen.readthedocs.io/en/latest/api/mp4.html # http://mutagen.readthedocs.io/en/latest/api/mp4.html
tags = {'album': '\xa9alb', tags = { 'album' : '\xa9alb',
'artist': '\xa9ART', 'artist' : '\xa9ART',
'date': '\xa9day', 'date' : '\xa9day',
'title': '\xa9nam', 'title' : '\xa9nam',
'originaldate': 'purd', 'year' : '\xa9day',
'comment': '\xa9cmt', 'originaldate' : 'purd',
'group': '\xa9grp', 'comment' : '\xa9cmt',
'writer': '\xa9wrt', 'group' : '\xa9grp',
'genre': '\xa9gen', 'writer' : '\xa9wrt',
'tracknumber': 'trkn', 'genre' : '\xa9gen',
'albumartist': 'aART', 'tracknumber' : 'trkn',
'disknumber': 'disk', 'albumartist' : 'aART',
'cpil': 'cpil', 'disknumber' : 'disk',
'albumart': 'covr', 'cpil' : 'cpil',
'copyright': 'cprt', 'albumart' : 'covr',
'tempo': 'tmpo'} 'copyright' : 'cprt',
'tempo' : 'tmpo',
'lyrics' : '\xa9lyr' }
audiofile = MP4(music_file) audiofile = MP4(music_file)
audiofile[tags['artist']] = meta_tags['artists'][0]['name'] audiofile[tags['artist']] = meta_tags['artists'][0]['name']
@@ -111,11 +130,16 @@ def embed_m4a(music_file, meta_tags):
meta_tags['total_tracks'])] meta_tags['total_tracks'])]
audiofile[tags['disknumber']] = [(meta_tags['disc_number'], 0)] audiofile[tags['disknumber']] = [(meta_tags['disc_number'], 0)]
audiofile[tags['date']] = meta_tags['release_date'] audiofile[tags['date']] = meta_tags['release_date']
year, *_ = meta_tags['release_date'].split('-')
audiofile[tags['year']] = year
audiofile[tags['originaldate']] = meta_tags['release_date'] audiofile[tags['originaldate']] = meta_tags['release_date']
audiofile[tags['comment']] = meta_tags['external_urls']['spotify']
if meta_tags['genre']: if meta_tags['genre']:
audiofile[tags['genre']] = meta_tags['genre'] audiofile[tags['genre']] = meta_tags['genre']
if meta_tags['copyright']: if meta_tags['copyright']:
audiofile[tags['copyright']] = meta_tags['copyright'] audiofile[tags['copyright']] = meta_tags['copyright']
if meta_tags['lyrics']:
audiofile[tags['lyrics']] = meta_tags['lyrics']
try: try:
albumart = urllib.request.urlopen(meta_tags['album']['images'][0]['url']) albumart = urllib.request.urlopen(meta_tags['album']['images'][0]['url'])
audiofile[tags['albumart']] = [MP4Cover( audiofile[tags['albumart']] = [MP4Cover(

View File

@@ -6,3 +6,4 @@ mutagen >= 1.37
unicode-slugify >= 0.1.3 unicode-slugify >= 0.1.3
titlecase >= 0.10.0 titlecase >= 0.10.0
logzero >= 1.3.1 logzero >= 1.3.1
lyricwikia >= 0.1.8

View File

@@ -9,6 +9,7 @@ from titlecase import titlecase
from slugify import slugify from slugify import slugify
import spotipy import spotipy
import pafy import pafy
import lyricwikia
import urllib.request import urllib.request
import os import os
import sys import sys
@@ -57,6 +58,19 @@ def generate_metadata(raw_song):
meta_tags[u'publisher'] = album['label'] meta_tags[u'publisher'] = album['label']
meta_tags[u'total_tracks'] = album['tracks']['total'] meta_tags[u'total_tracks'] = album['tracks']['total']
log.debug('Fetching lyrics')
try:
meta_tags['lyrics'] = lyricwikia.get_lyrics(
meta_tags['artists'][0]['name'],
meta_tags['name'])
except lyricwikia.LyricsNotFound:
meta_tags['lyrics'] = None
# remove unused clutter when debug meta_tags
del meta_tags['available_markets']
del meta_tags['album']['available_markets']
log.debug(pprint.pformat(meta_tags)) log.debug(pprint.pformat(meta_tags))
return meta_tags return meta_tags
@@ -99,15 +113,15 @@ def generate_youtube_url(raw_song, meta_tags, tries_remaining=5):
else: else:
song = generate_songname(meta_tags) song = generate_songname(meta_tags)
query['q'] = song query['q'] = song
log.debug('Query: {0}'.format(query)) log.debug('query: {0}'.format(query))
data = pafy.call_gdata('search', query) data = pafy.call_gdata('search', query)
query2 = {'part': 'contentDetails,snippet,statistics', query_results = {'part': 'contentDetails,snippet,statistics',
'maxResults': 50, 'maxResults': 50,
'id': ','.join(i['id']['videoId'] for i in data['items'])} 'id': ','.join(i['id']['videoId'] for i in data['items'])}
log.debug('Query2: {0}'.format(query2)) log.debug('query_results: {0}'.format(query_results))
vdata = pafy.call_gdata('videos', query2) vdata = pafy.call_gdata('videos', query_results)
videos = [] videos = []
for x in vdata['items']: for x in vdata['items']:
@@ -122,8 +136,6 @@ def generate_youtube_url(raw_song, meta_tags, tries_remaining=5):
if not videos: if not videos:
return None return None
log.debug(pprint.pformat(videos))
if args.manual: if args.manual:
log.info(song) log.info(song)
log.info('0. Skip downloading this song.\n') log.info('0. Skip downloading this song.\n')
@@ -491,8 +503,8 @@ if __name__ == '__main__':
elif args.username: elif args.username:
feed_playlist(username=args.username) feed_playlist(username=args.username)
# Actually we don't necessarily need this, but yeah... # actually we don't necessarily need this, but yeah...
# Explicit is better than implicit! # explicit is better than implicit!
sys.exit(0) sys.exit(0)
except KeyboardInterrupt as e: except KeyboardInterrupt as e: