Filter unwanted text from Spotify URLs when extracting information (#394)

* Split unwanted URL part

* Convert get_splits() -> extract_spotify_id()

* Add tests for extract_spotify_id()

* Extract Spotify artist ID when fetching artist albums
This commit is contained in:
Ritiek Malhotra
2018-10-26 18:29:29 +05:30
committed by GitHub
parent 94dc27a77b
commit be4bb25c96
3 changed files with 102 additions and 38 deletions

View File

@@ -89,9 +89,7 @@ def format_string(string_format, tags, slugification=False, force_spaces=False):
format_tags[11] = tags["external_ids"]["isrc"] format_tags[11] = tags["external_ids"]["isrc"]
format_tags_sanitized = { format_tags_sanitized = {
k: sanitize_title(str(v), ok="'-_()[]{}") k: sanitize_title(str(v), ok="'-_()[]{}") if slugification else str(v)
if slugification
else str(v)
for k, v in format_tags.items() for k, v in format_tags.items()
} }
@@ -157,14 +155,30 @@ def get_sec(time_str):
return sec return sec
def get_splits(url): def extract_spotify_id(raw_string):
if "/" in url: """
if url.endswith("/"): Returns a Spotify ID of a playlist, album, etc. after extracting
url = url[:-1] it from a given HTTP URL or Spotify URI.
splits = url.split("/") """
if "/" in raw_string:
# Input string is an HTTP URL
if raw_string.endswith("/"):
raw_string = raw_string[:-1]
# We need to manually trim additional text from HTTP URLs
# We could skip this if https://github.com/plamere/spotipy/pull/324
# gets merged,
to_trim = raw_string.find("?")
if not to_trim == -1:
raw_string = raw_string[:to_trim]
splits = raw_string.split("/")
else: else:
splits = url.split(":") # Input string is a Spotify URI
return splits splits = raw_string.split(":")
spotify_id = splits[-1]
return spotify_id
def get_unique_tracks(text_file): def get_unique_tracks(text_file):
@@ -183,6 +197,7 @@ def get_unique_tracks(text_file):
return lines return lines
# a hacky way to get user's localized music directory # a hacky way to get user's localized music directory
# (thanks @linusg, issue #203) # (thanks @linusg, issue #203)
def get_music_dir(): def get_music_dir():
@@ -203,9 +218,14 @@ def get_music_dir():
# Windows / Cygwin # Windows / Cygwin
# Queries registry for 'My Music' folder path (as this can be changed) # Queries registry for 'My Music' folder path (as this can be changed)
if 'win' in sys.platform: if "win" in sys.platform:
try: try:
key = winreg.OpenKey(winreg.HKEY_CURRENT_USER, r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders", 0, winreg.KEY_ALL_ACCESS) key = winreg.OpenKey(
winreg.HKEY_CURRENT_USER,
r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders",
0,
winreg.KEY_ALL_ACCESS,
)
return winreg.QueryValueEx(key, "My Music")[0] return winreg.QueryValueEx(key, "My Music")[0]
except (FileNotFoundError, NameError): except (FileNotFoundError, NameError):
pass pass

View File

@@ -122,17 +122,15 @@ def get_playlists(username):
def fetch_playlist(playlist): def fetch_playlist(playlist):
splits = internals.get_splits(playlist)
try: try:
username = splits[-3] playlist_id = internals.extract_spotify_id(playlist)
except IndexError: except IndexError:
# Wrong format, in either case # Wrong format, in either case
log.error("The provided playlist URL is not in a recognized format!") log.error("The provided playlist URL is not in a recognized format!")
sys.exit(10) sys.exit(10)
playlist_id = splits[-1]
try: try:
results = spotify.user_playlist( results = spotify.user_playlist(
username, playlist_id, fields="tracks,next,name" user=None, playlist_id=playlist_id, fields="tracks,next,name"
) )
except spotipy.client.SpotifyException: except spotipy.client.SpotifyException:
log.error("Unable to find playlist") log.error("Unable to find playlist")
@@ -151,12 +149,12 @@ def write_playlist(playlist_url, text_file=None):
def fetch_album(album): def fetch_album(album):
splits = internals.get_splits(album) album_id = internals.extract_spotify_id(album)
album_id = splits[-1]
album = spotify.album(album_id) album = spotify.album(album_id)
return album return album
def fetch_album_from_artist(artist_url, album_type='album'):
def fetch_album_from_artist(artist_url, album_type="album"):
""" """
This funcction returns all the albums from a give artist_url using the US This funcction returns all the albums from a give artist_url using the US
market market
@@ -166,16 +164,17 @@ def fetch_album_from_artist(artist_url, album_type='album'):
:param return - the album from the artist :param return - the album from the artist
""" """
#fetching artist's albums limitting the results to the US to avoid duplicate # fetching artist's albums limitting the results to the US to avoid duplicate
#albums from multiple markets # albums from multiple markets
results = spotify.artist_albums(artist_url, album_type=album_type, country='US') artist_id = internals.extract_spotify_id(artist_url)
results = spotify.artist_albums(artist_id, album_type=album_type, country="US")
albums = results['items'] albums = results["items"]
#indexing all pages of results # indexing all pages of results
while results['next']: while results["next"]:
results = spotify.next(results) results = spotify.next(results)
albums.extend(results['items']) albums.extend(results["items"])
return albums return albums
@@ -189,27 +188,28 @@ def write_all_albums_from_artist(artist_url, text_file=None):
:param text_file - file to write albums to :param text_file - file to write albums to
""" """
album_base_url = 'https://open.spotify.com/album/' album_base_url = "https://open.spotify.com/album/"
#fetching all default albums # fetching all default albums
albums = fetch_album_from_artist(artist_url) albums = fetch_album_from_artist(artist_url)
#if no file if given, the default save file is in the current working # if no file if given, the default save file is in the current working
#directory with the name of the artist # directory with the name of the artist
if text_file is None: if text_file is None:
text_file = albums[0]['artists'][0]['name']+'.txt' text_file = albums[0]["artists"][0]["name"] + ".txt"
for album in albums: for album in albums:
#logging album name # logging album name
log.info('Fetching album: ' + album['name']) log.info("Fetching album: " + album["name"])
write_album(album_base_url + album['id'], text_file=text_file) write_album(album_base_url + album["id"], text_file=text_file)
#fetching all single albums # fetching all single albums
singles = fetch_album_from_artist(artist_url, album_type='single') singles = fetch_album_from_artist(artist_url, album_type="single")
for single in singles: for single in singles:
log.info('Fetching single: ' + single['name']) log.info("Fetching single: " + single["name"])
write_album(album_base_url + single['id'], text_file=text_file) write_album(album_base_url + single["id"], text_file=text_file)
def write_album(album_url, text_file=None): def write_album(album_url, text_file=None):
album = fetch_album(album_url) album = fetch_album(album_url)

View File

@@ -41,6 +41,44 @@ DUPLICATE_TRACKS_TEST_TABLE = [
), ),
] ]
STRING_IDS_TEST_TABLE = [
(
"https://open.spotify.com/artist/1feoGrmmD8QmNqtK2Gdwy8?si=_cVm-FBRQmi7VWML7E49Ig",
"1feoGrmmD8QmNqtK2Gdwy8",
),
(
"https://open.spotify.com/artist/1feoGrmmD8QmNqtK2Gdwy8",
"1feoGrmmD8QmNqtK2Gdwy8",
),
("spotify:artist:1feoGrmmD8QmNqtK2Gdwy8", "1feoGrmmD8QmNqtK2Gdwy8"),
(
"https://open.spotify.com/album/1d1l3UkeAjtM7kVTDyR8yp?si=LkVQLJGGT--Lh8BWM4MGvg",
"1d1l3UkeAjtM7kVTDyR8yp",
),
("https://open.spotify.com/album/1d1l3UkeAjtM7kVTDyR8yp", "1d1l3UkeAjtM7kVTDyR8yp"),
("spotify:album:1d1l3UkeAjtM7kVTDyR8yp", "1d1l3UkeAjtM7kVTDyR8yp"),
(
"https://open.spotify.com/user/5kkyy50uu8btnagp30pobxz2f/playlist/3SFKRjUXm0IMQJMkEgPHeY?si=8Da4gbE2T9qMkd8Upg22ZA",
"3SFKRjUXm0IMQJMkEgPHeY",
),
(
"https://open.spotify.com/playlist/3SFKRjUXm0IMQJMkEgPHeY?si=8Da4gbE2T9qMkd8Upg22ZA",
"3SFKRjUXm0IMQJMkEgPHeY",
),
(
"https://open.spotify.com/playlist/3SFKRjUXm0IMQJMkEgPHeY",
"3SFKRjUXm0IMQJMkEgPHeY",
),
(
"spotify:user:5kkyy50uu8btnagp30pobxz2f:playlist:3SFKRjUXm0IMQJMkEgPHeY",
"3SFKRjUXm0IMQJMkEgPHeY",
),
(
"https://open.spotify.com/user/uqlakumu7wslkoen46s5bulq0",
"uqlakumu7wslkoen46s5bulq0",
),
]
def test_default_music_directory(): def test_default_music_directory():
if sys.platform.startswith("linux"): if sys.platform.startswith("linux"):
@@ -126,3 +164,9 @@ def test_get_unique_tracks(tmpdir, duplicates, expected):
unique_tracks = internals.get_unique_tracks(file_path) unique_tracks = internals.get_unique_tracks(file_path)
assert tuple(unique_tracks) == expected assert tuple(unique_tracks) == expected
@pytest.mark.parametrize("input_str, expected_spotify_id", STRING_IDS_TEST_TABLE)
def test_extract_spotify_id(input_str, expected_spotify_id):
spotify_id = internals.extract_spotify_id(input_str)
assert spotify_id == expected_spotify_id