Merge pull request from GHSA-q764-g6fm-555v

* Improve URL and URI handling

* Back to SpotifyException for backward-compatibility

* fix copy paste typo

* TODO v3 comments

Co-authored-by: Stephane Bruckert <stephane.bruckert@gmail.com>
This commit is contained in:
Shaderbug 2023-01-23 19:50:07 +01:00 committed by GitHub
parent 262e7a0443
commit b1db0b63d9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 53 additions and 17 deletions

View File

@ -8,8 +8,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## Unreleased ## Unreleased
// Add new changes below this line // Add new changes below this line
- Modified docstring for playlist_add_items() to accept "only URIs or URLs",
with intended deprecation for IDs in v3
### Added ### Added
@ -17,8 +15,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Added Comment to README - Getting Started for user to add URI to app in Spotify Developer Dashboard. - Added Comment to README - Getting Started for user to add URI to app in Spotify Developer Dashboard.
- Added playlist_add_tracks.py to example folder - Added playlist_add_tracks.py to example folder
### Changed
- Modified docstring for playlist_add_items() to accept "only URIs or URLs",
with intended deprecation for IDs in v3
### Fixed ### Fixed
- Path traversal vulnerability that may lead to type confusion in URI handling code
- Update contributing.md - Update contributing.md
### Removed ### Removed

View File

@ -6,6 +6,7 @@ __all__ = ["Spotify", "SpotifyException"]
import json import json
import logging import logging
import re
import warnings import warnings
import requests import requests
@ -96,6 +97,29 @@ class Spotify(object):
"US", "US",
"UY"] "UY"]
# Spotify URI scheme defined in [1], and the ID format as base-62 in [2].
#
# Unfortunately the IANA specification is out of date and doesn't include the new types
# show and episode. Additionally, for the user URI, it does not specify which characters
# are valid for usernames, so the assumption is alphanumeric which coincidentially are also
# the same ones base-62 uses.
# In limited manual exploration this seems to hold true, as newly accounts are assigned an
# identifier that looks like the base-62 of all other IDs, but some older accounts only have
# numbers and even older ones seemed to have been allowed to freely pick this name.
#
# [1] https://www.iana.org/assignments/uri-schemes/prov/spotify
# [2] https://developer.spotify.com/documentation/web-api/#spotify-uris-and-ids
_regex_spotify_uri = r'^spotify:(?P<type>track|artist|album|playlist|show|episode|user):(?P<id>[0-9A-Za-z]+)$'
# Spotify URLs are defined at [1]. The assumption is made that they are all
# pointing to open.spotify.com, so a regex is used to parse them as well,
# instead of a more complex URL parsing function.
#
# [1] https://developer.spotify.com/documentation/web-api/#spotify-uris-and-ids
_regex_spotify_url = r'^(http[s]?:\/\/)?open.spotify.com\/(?P<type>track|artist|album|playlist|show|episode|user)\/(?P<id>[0-9A-Za-z]+)(\?.*)?$'
_regex_base62 = r'^[0-9A-Za-z]+$'
def __init__( def __init__(
self, self,
auth=None, auth=None,
@ -1940,21 +1964,29 @@ class Spotify(object):
return path return path
def _get_id(self, type, id): def _get_id(self, type, id):
fields = id.split(":") uri_match = re.search(Spotify._regex_spotify_uri, id)
if len(fields) >= 3: if uri_match is not None:
if type != fields[-2]: uri_match_groups = uri_match.groupdict()
logger.warning('Expected id of type %s but found type %s %s', if uri_match_groups['type'] != type:
type, fields[-2], id) # TODO change to a ValueError in v3
return fields[-1].split("?")[0] raise SpotifyException(400, -1, "Unexpected Spotify URI type.")
fields = id.split("/") return uri_match_groups['id']
if len(fields) >= 3:
itype = fields[-2] url_match = re.search(Spotify._regex_spotify_url, id)
if type != itype: if url_match is not None:
logger.warning('Expected id of type %s but found type %s %s', url_match_groups = url_match.groupdict()
type, itype, id) if url_match_groups['type'] != type:
return fields[-1].split("?")[0] raise SpotifyException(400, -1, "Unexpected Spotify URL type.")
# TODO change to a ValueError in v3
return url_match_groups['id']
# Raw identifiers might be passed, ensure they are also base-62
if re.search(Spotify._regex_base62, id) is not None:
return id return id
# TODO change to a ValueError in v3
raise SpotifyException(400, -1, "Unsupported URL / URI.")
def _get_uri(self, type, id): def _get_uri(self, type, id):
if self._is_uri(id): if self._is_uri(id):
return id return id
@ -1962,7 +1994,7 @@ class Spotify(object):
return "spotify:" + type + ":" + self._get_id(type, id) return "spotify:" + type + ":" + self._get_id(type, id)
def _is_uri(self, uri): def _is_uri(self, uri):
return uri.startswith("spotify:") and len(uri.split(':')) == 3 return re.search(Spotify._regex_spotify_uri, uri) is not None
def _search_multiple_markets(self, q, limit, offset, type, markets, total): def _search_multiple_markets(self, q, limit, offset, type, markets, total):
if total and limit > total: if total and limit > total: