mirror of
				https://github.com/KevinMidboe/spotify-downloader.git
				synced 2025-10-29 18:00:15 +00:00 
			
		
		
		
	Changes for v1.0.0 release (#345)
* Move spotdl.py inside spotdl directory * Fix Dockerfile * Re-upload FFmpeg binary * Small fixes ;)
This commit is contained in:
		
							
								
								
									
										240
									
								
								spotdl/youtube_tools.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										240
									
								
								spotdl/youtube_tools.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,240 @@ | ||||
| from bs4 import BeautifulSoup | ||||
| import urllib | ||||
| import pafy | ||||
|  | ||||
| from spotdl import internals | ||||
| from spotdl import const | ||||
|  | ||||
| import os | ||||
| import pprint | ||||
|  | ||||
| log = const.log | ||||
|  | ||||
| # Fix download speed throttle on short duration tracks | ||||
| # Read more on mps-youtube/pafy#199 | ||||
| pafy.g.opener.addheaders.append(('Range', 'bytes=0-')) | ||||
|  | ||||
|  | ||||
| def set_api_key(): | ||||
|     if const.args.youtube_api_key: | ||||
|         key = const.args.youtube_api_key | ||||
|     else: | ||||
|         # Please respect this YouTube token :) | ||||
|         key = 'AIzaSyC6cEeKlxtOPybk9sEe5ksFN5sB-7wzYp0' | ||||
|     pafy.set_api_key(key) | ||||
|  | ||||
|  | ||||
| def go_pafy(raw_song, meta_tags=None): | ||||
|     """ Parse track from YouTube. """ | ||||
|     if internals.is_youtube(raw_song): | ||||
|         track_info = pafy.new(raw_song) | ||||
|     else: | ||||
|         track_url = generate_youtube_url(raw_song, meta_tags) | ||||
|  | ||||
|         if track_url: | ||||
|             track_info = pafy.new(track_url) | ||||
|         else: | ||||
|             track_info = None | ||||
|  | ||||
|     return track_info | ||||
|  | ||||
|  | ||||
| def get_youtube_title(content, number=None): | ||||
|     """ Get the YouTube video's title. """ | ||||
|     title = content.title | ||||
|     if number: | ||||
|         return '{0}. {1}'.format(number, title) | ||||
|     else: | ||||
|         return title | ||||
|  | ||||
|  | ||||
| def download_song(file_name, content): | ||||
|     """ Download the audio file from YouTube. """ | ||||
|     _, extension = os.path.splitext(file_name) | ||||
|     if extension in ('.webm', '.m4a'): | ||||
|         link = content.getbestaudio(preftype=extension[1:]) | ||||
|     else: | ||||
|         log.debug('No audio streams available for {} type'.format(extension)) | ||||
|         return False | ||||
|  | ||||
|     if link: | ||||
|         log.debug('Downloading from URL: ' + link.url) | ||||
|         filepath = os.path.join(const.args.folder, file_name) | ||||
|         log.debug('Saving to: ' + filepath) | ||||
|         link.download(filepath=filepath) | ||||
|         return True | ||||
|     else: | ||||
|         log.debug('No audio streams available') | ||||
|         return False | ||||
|  | ||||
|  | ||||
| def generate_search_url(query): | ||||
|     """ Generate YouTube search URL for the given song. """ | ||||
|     # urllib.request.quote() encodes string with special characters | ||||
|     quoted_query = urllib.request.quote(query) | ||||
|     # Special YouTube URL filter to search only for videos | ||||
|     url = 'https://www.youtube.com/results?sp=EgIQAQ%253D%253D&q={0}'.format(quoted_query) | ||||
|     return url | ||||
|  | ||||
|  | ||||
| def is_video(result): | ||||
|     # ensure result is not a channel | ||||
|     not_video = result.find('channel') is not None or \ | ||||
|                 'yt-lockup-channel' in result.parent.attrs['class'] or \ | ||||
|                 'yt-lockup-channel' in result.attrs['class'] | ||||
|  | ||||
|     # ensure result is not a mix/playlist | ||||
|     not_video = not_video or \ | ||||
|                'yt-lockup-playlist' in result.parent.attrs['class'] | ||||
|  | ||||
|     # ensure video result is not an advertisement | ||||
|     not_video = not_video or \ | ||||
|                 result.find('googleads') is not None | ||||
|  | ||||
|     video = not not_video | ||||
|     return video | ||||
|  | ||||
|  | ||||
| def generate_youtube_url(raw_song, meta_tags): | ||||
|     url_fetch = GenerateYouTubeURL(raw_song, meta_tags) | ||||
|     if const.args.youtube_api_key: | ||||
|         url = url_fetch.api() | ||||
|     else: | ||||
|         url = url_fetch.scrape() | ||||
|     return url | ||||
|  | ||||
|  | ||||
| class GenerateYouTubeURL: | ||||
|     def __init__(self, raw_song, meta_tags): | ||||
|         self.raw_song = raw_song | ||||
|         self.meta_tags = meta_tags | ||||
|  | ||||
|         if meta_tags is None: | ||||
|             self.search_query = raw_song | ||||
|         else: | ||||
|             self.search_query = internals.format_string(const.args.search_format, | ||||
|                                                         meta_tags, force_spaces=True) | ||||
|  | ||||
|     def _best_match(self, videos): | ||||
|         """ Select the best matching video from a list of videos. """ | ||||
|         if const.args.manual: | ||||
|             log.info(self.raw_song) | ||||
|             log.info('0. Skip downloading this song.\n') | ||||
|             # fetch all video links on first page on YouTube | ||||
|             for i, v in enumerate(videos): | ||||
|                 log.info(u'{0}. {1} {2} {3}'.format(i+1, v['title'], v['videotime'], | ||||
|                       "http://youtube.com/watch?v="+v['link'])) | ||||
|             # let user select the song to download | ||||
|             result = internals.input_link(videos) | ||||
|             if result is None: | ||||
|                 return None | ||||
|         else: | ||||
|             if not self.meta_tags: | ||||
|                 # if the metadata could not be acquired, take the first result | ||||
|                 # from Youtube because the proper song length is unknown | ||||
|                 result = videos[0] | ||||
|                 log.debug('Since no metadata found on Spotify, going with the first result') | ||||
|             else: | ||||
|                 # filter out videos that do not have a similar length to the Spotify song | ||||
|                 duration_tolerance = 10 | ||||
|                 max_duration_tolerance = 20 | ||||
|                 possible_videos_by_duration = [] | ||||
|  | ||||
|                 # start with a reasonable duration_tolerance, and increment duration_tolerance | ||||
|                 # until one of the Youtube results falls within the correct duration or | ||||
|                 # the duration_tolerance has reached the max_duration_tolerance | ||||
|                 while len(possible_videos_by_duration) == 0: | ||||
|                     possible_videos_by_duration = list(filter(lambda x: abs(x['seconds'] - self.meta_tags['duration']) <= duration_tolerance, videos)) | ||||
|                     duration_tolerance += 1 | ||||
|                     if duration_tolerance > max_duration_tolerance: | ||||
|                         log.error("{0} by {1} was not found.\n".format(self.meta_tags['name'], self.meta_tags['artists'][0]['name'])) | ||||
|                         return None | ||||
|  | ||||
|                 result = possible_videos_by_duration[0] | ||||
|  | ||||
|         if result: | ||||
|             url = "http://youtube.com/watch?v={0}".format(result['link']) | ||||
|         else: | ||||
|             url = None | ||||
|  | ||||
|         return url | ||||
|  | ||||
|     def scrape(self, bestmatch=True, tries_remaining=5): | ||||
|         """ Search and scrape YouTube to return a list of matching videos. """ | ||||
|  | ||||
|         # prevents an infinite loop but allows for a few retries | ||||
|         if tries_remaining == 0: | ||||
|             log.debug('No tries left. I quit.') | ||||
|             return | ||||
|  | ||||
|         search_url = generate_search_url(self.search_query) | ||||
|         log.debug('Opening URL: {0}'.format(search_url)) | ||||
|  | ||||
|         item = urllib.request.urlopen(search_url).read() | ||||
|         items_parse = BeautifulSoup(item, "html.parser") | ||||
|  | ||||
|         videos = [] | ||||
|         for x in items_parse.find_all('div', {'class': 'yt-lockup-dismissable yt-uix-tile'}): | ||||
|  | ||||
|             if not is_video(x): | ||||
|                 continue | ||||
|  | ||||
|             y = x.find('div', class_='yt-lockup-content') | ||||
|             link = y.find('a')['href'][-11:] | ||||
|             title = y.find('a')['title'] | ||||
|  | ||||
|             try: | ||||
|                 videotime = x.find('span', class_="video-time").get_text() | ||||
|             except AttributeError: | ||||
|                 log.debug('Could not find video duration on YouTube, retrying..') | ||||
|                 return self.scrape(bestmatch=bestmatch, tries_remaining=tries_remaining-1) | ||||
|  | ||||
|             youtubedetails = {'link': link, 'title': title, 'videotime': videotime, | ||||
|                               'seconds': internals.get_sec(videotime)} | ||||
|             videos.append(youtubedetails) | ||||
|  | ||||
|         if bestmatch: | ||||
|             return self._best_match(videos) | ||||
|  | ||||
|         return videos | ||||
|  | ||||
|  | ||||
|     def api(self, bestmatch=True): | ||||
|         """ Use YouTube API to search and return a list of matching videos. """ | ||||
|  | ||||
|         query = { 'part'       : 'snippet', | ||||
|                   'maxResults' :  50, | ||||
|                   'type'       : 'video' } | ||||
|  | ||||
|         if const.args.music_videos_only: | ||||
|             query['videoCategoryId'] = '10' | ||||
|  | ||||
|         if not self.meta_tags: | ||||
|             song = self.raw_song | ||||
|             query['q'] = song | ||||
|         else: | ||||
|             query['q'] = self.search_query | ||||
|         log.debug('query: {0}'.format(query)) | ||||
|  | ||||
|         data = pafy.call_gdata('search', query) | ||||
|         data['items'] = list(filter(lambda x: x['id'].get('videoId') is not None, | ||||
|                                     data['items'])) | ||||
|         query_results = {'part': 'contentDetails,snippet,statistics', | ||||
|                   'maxResults': 50, | ||||
|                   'id': ','.join(i['id']['videoId'] for i in data['items'])} | ||||
|         log.debug('query_results: {0}'.format(query_results)) | ||||
|  | ||||
|         vdata = pafy.call_gdata('videos', query_results) | ||||
|  | ||||
|         videos = [] | ||||
|         for x in vdata['items']: | ||||
|             duration_s = pafy.playlist.parseISO8591(x['contentDetails']['duration']) | ||||
|             youtubedetails = {'link': x['id'], 'title': x['snippet']['title'], | ||||
|                               'videotime':internals.videotime_from_seconds(duration_s), | ||||
|                               'seconds': duration_s} | ||||
|             videos.append(youtubedetails) | ||||
|  | ||||
|         if bestmatch: | ||||
|             return self._best_match(videos) | ||||
|  | ||||
|         return videos | ||||
		Reference in New Issue
	
	Block a user