mirror of
https://github.com/KevinMidboe/spotify-downloader.git
synced 2025-10-29 18:00:15 +00:00
Create youtube_tools.py
This commit is contained in:
68
core/arguments.py
Normal file
68
core/arguments.py
Normal file
@@ -0,0 +1,68 @@
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
|
||||
from core.logger import log_leveller, _LOG_LEVELS_STR
|
||||
|
||||
|
||||
def get_arguments():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Download and convert songs from Spotify, Youtube etc.',
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
group = parser.add_mutually_exclusive_group(required=True)
|
||||
|
||||
group.add_argument(
|
||||
'-s', '--song', help='download song by spotify link or name')
|
||||
group.add_argument(
|
||||
'-l', '--list', help='download songs from a file')
|
||||
group.add_argument(
|
||||
'-p', '--playlist', help='load songs from playlist URL into <playlist_name>.txt')
|
||||
group.add_argument(
|
||||
'-b', '--album', help='load songs from album URL into <album_name>.txt')
|
||||
group.add_argument(
|
||||
'-u', '--username',
|
||||
help="load songs from user's playlist into <playlist_name>.txt")
|
||||
parser.add_argument(
|
||||
'-m', '--manual', default=False,
|
||||
help='choose the song to download manually', action='store_true')
|
||||
parser.add_argument(
|
||||
'-nm', '--no-metadata', default=False,
|
||||
help='do not embed metadata in songs', action='store_true')
|
||||
parser.add_argument(
|
||||
'-a', '--avconv', default=False,
|
||||
help='Use avconv for conversion otherwise set defaults to ffmpeg',
|
||||
action='store_true')
|
||||
parser.add_argument(
|
||||
'-f', '--folder', default=(os.path.join(sys.path[0], 'Music')),
|
||||
help='path to folder where files will be stored in')
|
||||
parser.add_argument(
|
||||
'--overwrite', default='prompt',
|
||||
help='change the overwrite policy',
|
||||
choices={'prompt', 'force', 'skip'})
|
||||
parser.add_argument(
|
||||
'-i', '--input-ext', default='.m4a',
|
||||
help='prefered input format .m4a or .webm (Opus)')
|
||||
parser.add_argument(
|
||||
'-o', '--output-ext', default='.mp3',
|
||||
help='prefered output extension .mp3 or .m4a (AAC)')
|
||||
parser.add_argument(
|
||||
'-d', '--dry-run', default=False,
|
||||
help='Show only track title and YouTube URL',
|
||||
action='store_true')
|
||||
parser.add_argument(
|
||||
'-mo', '--music-videos-only', default=False,
|
||||
help='Search only for music on Youtube',
|
||||
action='store_true')
|
||||
parser.add_argument(
|
||||
'-ll', '--log-level', default='INFO',
|
||||
choices=_LOG_LEVELS_STR,
|
||||
type=str.upper,
|
||||
help='set log verbosity')
|
||||
|
||||
parsed = parser.parse_args()
|
||||
parsed.log_level = log_leveller(parsed.log_level)
|
||||
|
||||
return parsed
|
||||
|
||||
|
||||
parsed = get_arguments()
|
||||
@@ -1,11 +1,7 @@
|
||||
import argparse
|
||||
import spotipy.oauth2 as oauth2
|
||||
from urllib.request import quote
|
||||
from slugify import SLUG_OK, slugify
|
||||
from core.logger import log
|
||||
|
||||
import sys
|
||||
import os
|
||||
from core.logger import log, log_leveller, _LOG_LEVELS_STR
|
||||
|
||||
|
||||
def input_link(links):
|
||||
@@ -24,74 +20,14 @@ def input_link(links):
|
||||
log.warning('Choose a valid number!')
|
||||
|
||||
|
||||
def trim_song(file):
|
||||
def trim_song(text_file):
|
||||
""" Remove the first song from file. """
|
||||
with open(file, 'r') as file_in:
|
||||
with open(text_file, 'r') as file_in:
|
||||
data = file_in.read().splitlines(True)
|
||||
with open(file, 'w') as file_out:
|
||||
with open(text_file, 'w') as file_out:
|
||||
file_out.writelines(data[1:])
|
||||
|
||||
|
||||
def get_arguments():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Download and convert songs from Spotify, Youtube etc.',
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
group = parser.add_mutually_exclusive_group(required=True)
|
||||
|
||||
group.add_argument(
|
||||
'-s', '--song', help='download song by spotify link or name')
|
||||
group.add_argument(
|
||||
'-l', '--list', help='download songs from a file')
|
||||
group.add_argument(
|
||||
'-p', '--playlist', help='load songs from playlist URL into <playlist_name>.txt')
|
||||
group.add_argument(
|
||||
'-b', '--album', help='load songs from album URL into <album_name>.txt')
|
||||
group.add_argument(
|
||||
'-u', '--username',
|
||||
help="load songs from user's playlist into <playlist_name>.txt")
|
||||
parser.add_argument(
|
||||
'-m', '--manual', default=False,
|
||||
help='choose the song to download manually', action='store_true')
|
||||
parser.add_argument(
|
||||
'-nm', '--no-metadata', default=False,
|
||||
help='do not embed metadata in songs', action='store_true')
|
||||
parser.add_argument(
|
||||
'-a', '--avconv', default=False,
|
||||
help='Use avconv for conversion otherwise set defaults to ffmpeg',
|
||||
action='store_true')
|
||||
parser.add_argument(
|
||||
'-f', '--folder', default=(os.path.join(sys.path[0], 'Music')),
|
||||
help='path to folder where files will be stored in')
|
||||
parser.add_argument(
|
||||
'--overwrite', default='prompt',
|
||||
help='change the overwrite policy',
|
||||
choices={'prompt', 'force', 'skip'})
|
||||
parser.add_argument(
|
||||
'-i', '--input-ext', default='.m4a',
|
||||
help='prefered input format .m4a or .webm (Opus)')
|
||||
parser.add_argument(
|
||||
'-o', '--output-ext', default='.mp3',
|
||||
help='prefered output extension .mp3 or .m4a (AAC)')
|
||||
parser.add_argument(
|
||||
'-d', '--dry-run', default=False,
|
||||
help='Show only track title and YouTube URL',
|
||||
action='store_true')
|
||||
parser.add_argument(
|
||||
'-mo', '--music-videos-only', default=False,
|
||||
help='Search only for music on Youtube',
|
||||
action='store_true')
|
||||
parser.add_argument(
|
||||
'-ll', '--log-level', default='INFO',
|
||||
choices=_LOG_LEVELS_STR,
|
||||
type=str.upper,
|
||||
help='set log verbosity')
|
||||
|
||||
parsed = parser.parse_args()
|
||||
parsed.log_level = log_leveller(parsed.log_level)
|
||||
|
||||
return parsed
|
||||
|
||||
|
||||
def is_spotify(raw_song):
|
||||
""" Check if the input song is a Spotify link. """
|
||||
status = len(raw_song) == 22 and raw_song.replace(" ", "%20") == raw_song
|
||||
@@ -107,6 +43,12 @@ def is_youtube(raw_song):
|
||||
return status
|
||||
|
||||
|
||||
def generate_songname(tags):
|
||||
""" Generate a string of the format '[artist] - [song]' for the given spotify song. """
|
||||
raw_song = u'{0} - {1}'.format(tags['artists'][0]['name'], tags['name'])
|
||||
return raw_song
|
||||
|
||||
|
||||
def sanitize_title(title):
|
||||
""" Generate filename of the song to be downloaded. """
|
||||
title = title.replace(' ', '_')
|
||||
|
||||
142
core/youtube_tools.py
Normal file
142
core/youtube_tools.py
Normal file
@@ -0,0 +1,142 @@
|
||||
import pafy
|
||||
|
||||
from core import internals
|
||||
from core import arguments
|
||||
from core.logger import log
|
||||
|
||||
import os
|
||||
import pprint
|
||||
|
||||
args = arguments.parsed
|
||||
|
||||
|
||||
def go_pafy(raw_song, meta_tags=None):
|
||||
""" Parse track from YouTube. """
|
||||
if internals.is_youtube(raw_song):
|
||||
track_info = pafy.new(raw_song)
|
||||
else:
|
||||
track_url = generate_youtube_url(raw_song, meta_tags)
|
||||
|
||||
if track_url:
|
||||
track_info = pafy.new(track_url)
|
||||
else:
|
||||
track_info = None
|
||||
|
||||
return track_info
|
||||
|
||||
|
||||
def get_youtube_title(content, number=None):
|
||||
""" Get the YouTube video's title. """
|
||||
title = content.title
|
||||
if number:
|
||||
return '{0}. {1}'.format(number, title)
|
||||
else:
|
||||
return title
|
||||
|
||||
|
||||
def download_song(file_name, content):
|
||||
""" Download the audio file from YouTube. """
|
||||
if args.input_ext in (".webm", ".m4a"):
|
||||
link = content.getbestaudio(preftype=args.input_ext[1:])
|
||||
else:
|
||||
return False
|
||||
|
||||
if link:
|
||||
log.debug('Downloading from URL: ' + link.url)
|
||||
filepath = '{0}{1}'.format(os.path.join(args.folder, file_name),
|
||||
args.input_ext)
|
||||
log.debug('Saving to: ' + filepath)
|
||||
link.download(filepath=filepath)
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def generate_youtube_url(raw_song, meta_tags, tries_remaining=5):
|
||||
""" Search for the song on YouTube and generate a URL to its video. """
|
||||
# prevents an infinite loop but allows for a few retries
|
||||
if tries_remaining == 0:
|
||||
log.debug('No tries left. I quit.')
|
||||
return
|
||||
|
||||
query = { 'part' : 'snippet',
|
||||
'maxResults' : 50,
|
||||
'type' : 'video' }
|
||||
|
||||
if args.music_videos_only:
|
||||
query['videoCategoryId'] = '10'
|
||||
|
||||
if not meta_tags:
|
||||
song = raw_song
|
||||
query['q'] = song
|
||||
else:
|
||||
song = internals.generate_songname(meta_tags)
|
||||
query['q'] = song
|
||||
log.debug('Query: {0}'.format(query))
|
||||
|
||||
data = pafy.call_gdata('search', query)
|
||||
query2 = {'part': 'contentDetails,snippet,statistics',
|
||||
'maxResults': 50,
|
||||
'id': ','.join(i['id']['videoId'] for i in data['items'])}
|
||||
log.debug('Query2: {0}'.format(query2))
|
||||
|
||||
vdata = pafy.call_gdata('videos', query2)
|
||||
|
||||
videos = []
|
||||
for x in vdata['items']:
|
||||
duration_s = pafy.playlist.parseISO8591(x['contentDetails']['duration'])
|
||||
youtubedetails = {'link': x['id'], 'title': x['snippet']['title'],
|
||||
'videotime':internals.videotime_from_seconds(duration_s),
|
||||
'seconds': duration_s}
|
||||
videos.append(youtubedetails)
|
||||
if not meta_tags:
|
||||
break
|
||||
|
||||
if not videos:
|
||||
return None
|
||||
|
||||
log.debug(pprint.pformat(videos))
|
||||
|
||||
if args.manual:
|
||||
log.info(song)
|
||||
log.info('0. Skip downloading this song.\n')
|
||||
# fetch all video links on first page on YouTube
|
||||
for i, v in enumerate(videos):
|
||||
log.info(u'{0}. {1} {2} {3}'.format(i+1, v['title'], v['videotime'],
|
||||
"http://youtube.com/watch?v="+v['link']))
|
||||
# let user select the song to download
|
||||
result = internals.input_link(videos)
|
||||
if not result:
|
||||
return None
|
||||
else:
|
||||
if not meta_tags:
|
||||
# if the metadata could not be acquired, take the first result
|
||||
# from Youtube because the proper song length is unknown
|
||||
result = videos[0]
|
||||
log.debug('Since no metadata found on Spotify, going with the first result')
|
||||
else:
|
||||
# filter out videos that do not have a similar length to the Spotify song
|
||||
duration_tolerance = 10
|
||||
max_duration_tolerance = 20
|
||||
possible_videos_by_duration = list()
|
||||
|
||||
'''
|
||||
start with a reasonable duration_tolerance, and increment duration_tolerance
|
||||
until one of the Youtube results falls within the correct duration or
|
||||
the duration_tolerance has reached the max_duration_tolerance
|
||||
'''
|
||||
while len(possible_videos_by_duration) == 0:
|
||||
possible_videos_by_duration = list(filter(lambda x: abs(x['seconds'] - (int(meta_tags['duration_ms'])/1000)) <= duration_tolerance, videos))
|
||||
duration_tolerance += 1
|
||||
if duration_tolerance > max_duration_tolerance:
|
||||
log.error("{0} by {1} was not found.\n".format(meta_tags['name'], meta_tags['artists'][0]['name']))
|
||||
return None
|
||||
|
||||
result = possible_videos_by_duration[0]
|
||||
|
||||
if result:
|
||||
url = "http://youtube.com/watch?v=" + result['link']
|
||||
else:
|
||||
url = None
|
||||
|
||||
return url
|
||||
170
spotdl.py
170
spotdl.py
@@ -6,6 +6,8 @@ from core import metadata
|
||||
from core import convert
|
||||
from core import internals
|
||||
from core import spotify_tools
|
||||
from core import youtube_tools
|
||||
from core import arguments
|
||||
from slugify import slugify
|
||||
import spotipy
|
||||
import pafy
|
||||
@@ -18,162 +20,6 @@ import platform
|
||||
import pprint
|
||||
|
||||
|
||||
def generate_songname(tags):
|
||||
""" Generate a string of the format '[artist] - [song]' for the given spotify song. """
|
||||
raw_song = u'{0} - {1}'.format(tags['artists'][0]['name'], tags['name'])
|
||||
return raw_song
|
||||
|
||||
|
||||
def is_video(result):
|
||||
# ensure result is not a channel
|
||||
not_video = result.find('channel') is not None or \
|
||||
'yt-lockup-channel' in result.parent.attrs['class'] or \
|
||||
'yt-lockup-channel' in result.attrs['class']
|
||||
|
||||
# ensure result is not a mix/playlist
|
||||
not_video = not_video or \
|
||||
'yt-lockup-playlist' in result.parent.attrs['class']
|
||||
|
||||
# ensure video result is not an advertisement
|
||||
not_video = not_video or \
|
||||
result.find('googleads') is not None
|
||||
|
||||
video = not not_video
|
||||
return video
|
||||
|
||||
|
||||
def generate_youtube_url(raw_song, meta_tags, tries_remaining=5):
|
||||
""" Search for the song on YouTube and generate a URL to its video. """
|
||||
# prevents an infinite loop but allows for a few retries
|
||||
if tries_remaining == 0:
|
||||
log.debug('No tries left. I quit.')
|
||||
return
|
||||
|
||||
query = {'part': 'snippet',
|
||||
'maxResults': 50,
|
||||
'type': 'video'}
|
||||
|
||||
if args.music_videos_only:
|
||||
query['videoCategoryId'] = '10'
|
||||
|
||||
if not meta_tags:
|
||||
song = raw_song
|
||||
query['q'] = song
|
||||
else:
|
||||
song = generate_songname(meta_tags)
|
||||
query['q'] = song
|
||||
log.debug('Query: {0}'.format(query))
|
||||
|
||||
data = pafy.call_gdata('search', query)
|
||||
query2 = {'part': 'contentDetails,snippet,statistics',
|
||||
'maxResults': 50,
|
||||
'id': ','.join(i['id']['videoId'] for i in data['items'])}
|
||||
log.debug('Query2: {0}'.format(query2))
|
||||
|
||||
vdata = pafy.call_gdata('videos', query2)
|
||||
|
||||
videos = []
|
||||
for x in vdata['items']:
|
||||
duration_s = pafy.playlist.parseISO8591(x['contentDetails']['duration'])
|
||||
youtubedetails = {'link': x['id'], 'title': x['snippet']['title'],
|
||||
'videotime':internals.videotime_from_seconds(duration_s),
|
||||
'seconds': duration_s}
|
||||
videos.append(youtubedetails)
|
||||
if not meta_tags:
|
||||
break
|
||||
|
||||
if not videos:
|
||||
return None
|
||||
|
||||
log.debug(pprint.pformat(videos))
|
||||
|
||||
if args.manual:
|
||||
log.info(song)
|
||||
log.info('0. Skip downloading this song.\n')
|
||||
# fetch all video links on first page on YouTube
|
||||
for i, v in enumerate(videos):
|
||||
log.info(u'{0}. {1} {2} {3}'.format(i+1, v['title'], v['videotime'],
|
||||
"http://youtube.com/watch?v="+v['link']))
|
||||
# let user select the song to download
|
||||
result = internals.input_link(videos)
|
||||
if not result:
|
||||
return None
|
||||
else:
|
||||
if not meta_tags:
|
||||
# if the metadata could not be acquired, take the first result
|
||||
# from Youtube because the proper song length is unknown
|
||||
result = videos[0]
|
||||
log.debug('Since no metadata found on Spotify, going with the first result')
|
||||
else:
|
||||
# filter out videos that do not have a similar length to the Spotify song
|
||||
duration_tolerance = 10
|
||||
max_duration_tolerance = 20
|
||||
possible_videos_by_duration = list()
|
||||
|
||||
'''
|
||||
start with a reasonable duration_tolerance, and increment duration_tolerance
|
||||
until one of the Youtube results falls within the correct duration or
|
||||
the duration_tolerance has reached the max_duration_tolerance
|
||||
'''
|
||||
while len(possible_videos_by_duration) == 0:
|
||||
possible_videos_by_duration = list(filter(lambda x: abs(x['seconds'] - (int(meta_tags['duration_ms'])/1000)) <= duration_tolerance, videos))
|
||||
duration_tolerance += 1
|
||||
if duration_tolerance > max_duration_tolerance:
|
||||
log.error("{0} by {1} was not found.\n".format(meta_tags['name'],meta_tags['artists'][0]['name']))
|
||||
return None
|
||||
|
||||
result = possible_videos_by_duration[0]
|
||||
|
||||
if result:
|
||||
url = "http://youtube.com/watch?v=" + result['link']
|
||||
else:
|
||||
url = None
|
||||
|
||||
return url
|
||||
|
||||
|
||||
def go_pafy(raw_song, meta_tags=None):
|
||||
""" Parse track from YouTube. """
|
||||
if internals.is_youtube(raw_song):
|
||||
track_info = pafy.new(raw_song)
|
||||
else:
|
||||
track_url = generate_youtube_url(raw_song, meta_tags)
|
||||
|
||||
if track_url:
|
||||
track_info = pafy.new(track_url)
|
||||
else:
|
||||
track_info = None
|
||||
|
||||
return track_info
|
||||
|
||||
|
||||
def get_youtube_title(content, number=None):
|
||||
""" Get the YouTube video's title. """
|
||||
title = content.title
|
||||
if number:
|
||||
return '{0}. {1}'.format(number, title)
|
||||
else:
|
||||
return title
|
||||
|
||||
|
||||
def download_song(file_name, content):
|
||||
""" Download the audio file from YouTube. """
|
||||
if args.input_ext in (".webm", ".m4a"):
|
||||
link = content.getbestaudio(preftype=args.input_ext[1:])
|
||||
else:
|
||||
return False
|
||||
|
||||
if link:
|
||||
log.debug('Downloading from URL: ' + link.url)
|
||||
filepath = '{0}{1}'.format(os.path.join(args.folder, file_name),
|
||||
args.input_ext)
|
||||
log.debug('Saving to: ' + filepath)
|
||||
link.download(filepath=filepath)
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def check_exists(music_file, raw_song, meta_tags):
|
||||
""" Check if the input song already exists in the given folder. """
|
||||
log.debug('Cleaning any temp files and checking '
|
||||
@@ -287,12 +133,12 @@ def grab_single(raw_song, number=None):
|
||||
""" Logic behind downloading a song. """
|
||||
if internals.is_youtube(raw_song):
|
||||
log.debug('Input song is a YouTube URL')
|
||||
content = go_pafy(raw_song, meta_tags=None)
|
||||
content = youtube_tools.go_pafy(raw_song, meta_tags=None)
|
||||
raw_song = slugify(content.title).replace('-', ' ')
|
||||
meta_tags = spotify_tools.generate_metadata(raw_song)
|
||||
else:
|
||||
meta_tags = spotify_tools.generate_metadata(raw_song)
|
||||
content = go_pafy(raw_song, meta_tags)
|
||||
content = youtube_tools.go_pafy(raw_song, meta_tags)
|
||||
|
||||
if not content:
|
||||
log.debug('Found no matching video')
|
||||
@@ -300,13 +146,13 @@ def grab_single(raw_song, number=None):
|
||||
|
||||
# "[number]. [artist] - [song]" if downloading from list
|
||||
# otherwise "[artist] - [song]"
|
||||
youtube_title = get_youtube_title(content, number)
|
||||
youtube_title = youtube_tools.get_youtube_title(content, number)
|
||||
log.info('{} ({})'.format(youtube_title, content.watchv_url))
|
||||
# generate file name of the song to download
|
||||
songname = content.title
|
||||
|
||||
if meta_tags:
|
||||
refined_songname = generate_songname(meta_tags)
|
||||
refined_songname = internals.generate_songname(meta_tags)
|
||||
log.debug('Refining songname from "{0}" to "{1}"'.format(songname, refined_songname))
|
||||
if not refined_songname == ' - ':
|
||||
songname = refined_songname
|
||||
@@ -317,7 +163,7 @@ def grab_single(raw_song, number=None):
|
||||
file_name = internals.sanitize_title(songname)
|
||||
|
||||
if not check_exists(file_name, raw_song, meta_tags):
|
||||
if download_song(file_name, content):
|
||||
if youtube_tools.download_song(file_name, content):
|
||||
input_song = file_name + args.input_ext
|
||||
output_song = file_name + args.output_ext
|
||||
print('')
|
||||
@@ -350,7 +196,7 @@ token = spotify_tools.generate_token()
|
||||
spotify = spotipy.Spotify(auth=token)
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = internals.get_arguments()
|
||||
args = arguments.parsed
|
||||
internals.filter_path(args.folder)
|
||||
|
||||
logger.log = logger.logzero.setup_logger(formatter=logger.formatter,
|
||||
|
||||
@@ -7,6 +7,8 @@ import os
|
||||
raw_song = "Tony's Videos VERY SHORT VIDEO 28.10.2016"
|
||||
|
||||
|
||||
test_args = '-f test -ll debug -m --overwrite skip'.split()
|
||||
|
||||
class TestArgs:
|
||||
manual = False
|
||||
input_ext = '.m4a'
|
||||
@@ -16,8 +18,7 @@ class TestArgs:
|
||||
overwrite = 'skip'
|
||||
music_videos_only = False
|
||||
|
||||
test_args = TestArgs()
|
||||
setattr(spotdl, "args", test_args)
|
||||
setattr(spotdl, "args", TestArgs())
|
||||
|
||||
spotdl.log = logger.logzero.setup_logger(formatter=logger.formatter,
|
||||
level=spotdl.args.log_level)
|
||||
|
||||
Reference in New Issue
Block a user