Skip to content

Commit c51da3f

Browse files
committed
plugins: restructure id extraction
1 parent f0afda7 commit c51da3f

11 files changed

+103
-269
lines changed

beets/plugins.py

+4-32
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737

3838
import beets
3939
from beets import logging
40+
from beets.util.id_extractors import extract_release_id
4041

4142
if sys.version_info >= (3, 10):
4243
from typing import ParamSpec
@@ -768,15 +769,6 @@ class Response(TypedDict):
768769
id: str
769770

770771

771-
class RegexDict(TypedDict):
772-
"""A dictionary containing a regex pattern and the number of the
773-
match group.
774-
"""
775-
776-
pattern: str
777-
match_group: int
778-
779-
780772
R = TypeVar("R", bound=Response)
781773

782774

@@ -785,11 +777,6 @@ def __init__(self):
785777
super().__init__()
786778
self.config.add({"source_weight": 0.5})
787779

788-
@property
789-
@abc.abstractmethod
790-
def id_regex(self) -> RegexDict:
791-
raise NotImplementedError
792-
793780
@property
794781
@abc.abstractmethod
795782
def data_source(self) -> str:
@@ -879,24 +866,9 @@ def get_artist(
879866

880867
return artist_string, artist_id
881868

882-
@staticmethod
883-
def _get_id(url_type: str, id_: str, id_regex: RegexDict) -> str | None:
884-
"""Parse an ID from its URL if necessary.
885-
886-
:param url_type: Type of URL. Either 'album' or 'track'.
887-
:param id_: Album/track ID or URL.
888-
:param id_regex: A dictionary containing a regular expression
889-
extracting an ID from an URL (if it's not an ID already) in
890-
'pattern' and the number of the match group in 'match_group'.
891-
:return: Album/track ID.
892-
"""
893-
log.debug("Extracting {} ID from '{}'", url_type, id_)
894-
match = re.search(id_regex["pattern"].format(url_type), str(id_))
895-
if match:
896-
id_ = match.group(id_regex["match_group"])
897-
if id_:
898-
return id_
899-
return None
869+
def _get_id(self, id_string: str) -> str | None:
870+
"""Parse release ID from the given ID string."""
871+
return extract_release_id(self.data_source.lower(), id_string)
900872

901873
def candidates(
902874
self,

beets/util/id_extractors.py

+20-37
Original file line numberDiff line numberDiff line change
@@ -14,52 +14,35 @@
1414

1515
"""Helpers around the extraction of album/track ID's from metadata sources."""
1616

17-
import re
18-
19-
# Spotify IDs consist of 22 alphanumeric characters
20-
# (zero-left-padded base62 representation of randomly generated UUID4)
21-
spotify_id_regex = {
22-
"pattern": r"(^|open\.spotify\.com/{}/)([0-9A-Za-z]{{22}})",
23-
"match_group": 2,
24-
}
25-
26-
deezer_id_regex = {
27-
"pattern": r"(^|deezer\.com/)([a-z]*/)?({}/)?(\d+)",
28-
"match_group": 4,
29-
}
30-
31-
beatport_id_regex = {
32-
"pattern": r"(^|beatport\.com/release/.+/)(\d+)$",
33-
"match_group": 2,
34-
}
35-
36-
# A note on Bandcamp: There is no such thing as a Bandcamp album or artist ID,
37-
# the URL can be used as the identifier. The Bandcamp metadata source plugin
38-
# works that way - https://github.com/snejus/beetcamp. Bandcamp album
39-
# URLs usually look like: https://nameofartist.bandcamp.com/album/nameofalbum
17+
from __future__ import annotations
4018

19+
import re
4120

42-
def extract_discogs_id_regex(album_id):
43-
"""Returns the Discogs_id or None."""
44-
# Discogs-IDs are simple integers. In order to avoid confusion with
45-
# other metadata plugins, we only look for very specific formats of the
46-
# input string:
21+
PATTERN_BY_SOURCE = {
22+
"spotify": re.compile(r"(?:^|open\.spotify\.com/[^/]+/)([0-9A-Za-z]{22})"),
23+
"deezer": re.compile(r"(?:^|deezer\.com/)(?:[a-z]*/)?(?:[^/]+/)?(\d+)"),
24+
"beatport": re.compile(r"(?:^|beatport\.com/release/.+/)(\d+)$"),
25+
"musicbrainz": re.compile(r"(\w{8}(?:-\w{4}){3}-\w{12})"),
4726
# - plain integer, optionally wrapped in brackets and prefixed by an
4827
# 'r', as this is how discogs displays the release ID on its webpage.
4928
# - legacy url format: discogs.com/<name of release>/release/<id>
5029
# - legacy url short format: discogs.com/release/<id>
5130
# - current url format: discogs.com/release/<id>-<name of release>
5231
# See #291, #4080 and #4085 for the discussions leading up to these
5332
# patterns.
54-
# Regex has been tested here https://regex101.com/r/TOu7kw/1
33+
"discogs": re.compile(
34+
r"(?:^|\[?r|discogs\.com/(?:[^/]+/)?release/)(\d+)\b"
35+
),
36+
# There is no such thing as a Bandcamp album or artist ID, the URL can be
37+
# used as the identifier. The Bandcamp metadata source plugin works that way
38+
# - https://github.com/snejus/beetcamp. Bandcamp album URLs usually look
39+
# like: https://nameofartist.bandcamp.com/album/nameofalbum
40+
"bandcamp": re.compile(r"(.+)"),
41+
"tidal": re.compile(r"([^/]+)$"),
42+
}
5543

56-
for pattern in [
57-
r"^\[?r?(?P<id>\d+)\]?$",
58-
r"discogs\.com/release/(?P<id>\d+)-?",
59-
r"discogs\.com/[^/]+/release/(?P<id>\d+)",
60-
]:
61-
match = re.search(pattern, album_id)
62-
if match:
63-
return int(match.group("id"))
6444

45+
def extract_release_id(source: str, id_: str) -> str | None:
46+
if m := PATTERN_BY_SOURCE[source].search(str(id_)):
47+
return m[1]
6548
return None

beetsplug/beatport.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
import beets.ui
3131
from beets.autotag.hooks import AlbumInfo, TrackInfo
3232
from beets.plugins import BeetsPlugin, MetadataSourcePlugin, get_distance
33-
from beets.util.id_extractors import beatport_id_regex
3433

3534
AUTH_ERRORS = (TokenRequestDenied, TokenMissing, VerifierMissing)
3635
USER_AGENT = f"beets/{beets.__version__} +https://beets.io/"
@@ -282,7 +281,6 @@ def __init__(self, data):
282281

283282
class BeatportPlugin(BeetsPlugin):
284283
data_source = "Beatport"
285-
id_regex = beatport_id_regex
286284

287285
def __init__(self):
288286
super().__init__()
@@ -394,8 +392,7 @@ def album_for_id(self, release_id):
394392
"""
395393
self._log.debug("Searching for release {0}", release_id)
396394

397-
release_id = self._get_id("album", release_id, self.id_regex)
398-
if release_id is None:
395+
if not (release_id := self._get_id(release_id)):
399396
self._log.debug("Not a valid Beatport release ID.")
400397
return None
401398

beetsplug/deezer.py

+19-25
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414

1515
"""Adds Deezer release and track search support to the autotagger"""
1616

17+
from __future__ import annotations
18+
1719
import collections
1820
import time
1921

@@ -25,7 +27,6 @@
2527
from beets.dbcore import types
2628
from beets.library import DateType
2729
from beets.plugins import BeetsPlugin, MetadataSourcePlugin
28-
from beets.util.id_extractors import deezer_id_regex
2930

3031

3132
class DeezerPlugin(MetadataSourcePlugin, BeetsPlugin):
@@ -43,8 +44,6 @@ class DeezerPlugin(MetadataSourcePlugin, BeetsPlugin):
4344
album_url = "https://api.deezer.com/album/"
4445
track_url = "https://api.deezer.com/track/"
4546

46-
id_regex = deezer_id_regex
47-
4847
def __init__(self):
4948
super().__init__()
5049

@@ -75,21 +74,15 @@ def fetch_data(self, url):
7574
return None
7675
return data
7776

78-
def album_for_id(self, album_id):
79-
"""Fetch an album by its Deezer ID or URL and return an
80-
AlbumInfo object or None if the album is not found.
81-
82-
:param album_id: Deezer ID or URL for the album.
83-
:type album_id: str
84-
:return: AlbumInfo object for album.
85-
:rtype: beets.autotag.hooks.AlbumInfo or None
86-
"""
87-
deezer_id = self._get_id("album", album_id, self.id_regex)
88-
if deezer_id is None:
77+
def album_for_id(self, album_id: str) -> AlbumInfo | None:
78+
"""Fetch an album by its Deezer ID or URL."""
79+
if not (deezer_id := self._get_id(album_id)):
8980
return None
90-
album_data = self.fetch_data(self.album_url + deezer_id)
91-
if album_data is None:
81+
82+
album_url = f"{self.album_url}{deezer_id}"
83+
if not (album_data := self.fetch_data(album_url)):
9284
return None
85+
9386
contributors = album_data.get("contributors")
9487
if contributors is not None:
9588
artist, artist_id = self.get_artist(contributors)
@@ -132,7 +125,7 @@ def album_for_id(self, album_id):
132125
tracks_data.extend(tracks_obj["data"])
133126

134127
tracks = []
135-
medium_totals = collections.defaultdict(int)
128+
medium_totals: dict[int | None, int] = collections.defaultdict(int)
136129
for i, track_data in enumerate(tracks_data, start=1):
137130
track = self._get_track(track_data)
138131
track.index = i
@@ -150,13 +143,15 @@ def album_for_id(self, album_id):
150143
artist_id=artist_id,
151144
tracks=tracks,
152145
albumtype=album_data["record_type"],
153-
va=len(album_data["contributors"]) == 1
154-
and artist.lower() == "various artists",
146+
va=(
147+
len(album_data["contributors"]) == 1
148+
and (artist or "").lower() == "various artists"
149+
),
155150
year=year,
156151
month=month,
157152
day=day,
158153
label=album_data["label"],
159-
mediums=max(medium_totals.keys()),
154+
mediums=max(filter(None, medium_totals.keys())),
160155
data_source=self.data_source,
161156
data_url=album_data["link"],
162157
cover_art_url=album_data.get("cover_xl"),
@@ -204,12 +199,11 @@ def track_for_id(self, track_id=None, track_data=None):
204199
:rtype: beets.autotag.hooks.TrackInfo or None
205200
"""
206201
if track_data is None:
207-
deezer_id = self._get_id("track", track_id, self.id_regex)
208-
if deezer_id is None:
209-
return None
210-
track_data = self.fetch_data(self.track_url + deezer_id)
211-
if track_data is None:
202+
if not (deezer_id := self._get_id(track_id)) or not (
203+
track_data := self.fetch_data(f"{self.track_url}{deezer_id}")
204+
):
212205
return None
206+
213207
track = self._get_track(track_data)
214208

215209
# Get album's tracks to set `track.index` (position on the entire

beetsplug/discogs.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
from beets import config
3939
from beets.autotag.hooks import AlbumInfo, TrackInfo, string_dist
4040
from beets.plugins import BeetsPlugin, MetadataSourcePlugin, get_distance
41-
from beets.util.id_extractors import extract_discogs_id_regex
41+
from beets.util.id_extractors import extract_release_id
4242

4343
USER_AGENT = f"beets/{beets.__version__} +https://beets.io/"
4444
API_KEY = "rAzVUQYRaoFjeBjyWuWZ"
@@ -266,7 +266,7 @@ def album_for_id(self, album_id):
266266
"""
267267
self._log.debug("Searching for release {0}", album_id)
268268

269-
discogs_id = extract_discogs_id_regex(album_id)
269+
discogs_id = extract_release_id("discogs", album_id)
270270

271271
if not discogs_id:
272272
return None
@@ -401,7 +401,7 @@ def get_album_info(self, result):
401401
else:
402402
genre = base_genre
403403

404-
discogs_albumid = extract_discogs_id_regex(result.data.get("uri"))
404+
discogs_albumid = extract_release_id("discogs", result.data.get("uri"))
405405

406406
# Extract information for the optional AlbumInfo fields that are
407407
# contained on nested discogs fields.

beetsplug/musicbrainz.py

+9-40
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616

1717
from __future__ import annotations
1818

19-
import re
2019
import traceback
2120
from collections import Counter
2221
from itertools import product
@@ -28,13 +27,8 @@
2827
import beets
2928
import beets.autotag.hooks
3029
from beets import config, plugins, util
31-
from beets.plugins import BeetsPlugin, MetadataSourcePlugin
32-
from beets.util.id_extractors import (
33-
beatport_id_regex,
34-
deezer_id_regex,
35-
extract_discogs_id_regex,
36-
spotify_id_regex,
37-
)
30+
from beets.plugins import BeetsPlugin
31+
from beets.util.id_extractors import extract_release_id
3832

3933
if TYPE_CHECKING:
4034
from collections.abc import Iterator
@@ -300,17 +294,6 @@ def _set_date_str(
300294
setattr(info, key, date_num)
301295

302296

303-
def _parse_id(s: str) -> str | None:
304-
"""Search for a MusicBrainz ID in the given string and return it. If
305-
no ID can be found, return None.
306-
"""
307-
# Find the first thing that looks like a UUID/MBID.
308-
match = re.search("[a-f0-9]{8}(-[a-f0-9]{4}){3}-[a-f0-9]{12}", s)
309-
if match is not None:
310-
return match.group() if match else None
311-
return None
312-
313-
314297
def _is_translation(r):
315298
_trans_key = "transl-tracklisting"
316299
return r["type"] == _trans_key and r["direction"] == "backward"
@@ -750,24 +733,10 @@ def album_info(self, release: JSONDict) -> beets.autotag.hooks.AlbumInfo:
750733
source.capitalize(),
751734
)
752735

753-
if "discogs" in urls:
754-
info.discogs_albumid = extract_discogs_id_regex(urls["discogs"])
755-
if "bandcamp" in urls:
756-
info.bandcamp_album_id = urls["bandcamp"]
757-
if "spotify" in urls:
758-
info.spotify_album_id = MetadataSourcePlugin._get_id(
759-
"album", urls["spotify"], spotify_id_regex
736+
for source, url in urls.items():
737+
setattr(
738+
info, f"{source}_album_id", extract_release_id(source, url)
760739
)
761-
if "deezer" in urls:
762-
info.deezer_album_id = MetadataSourcePlugin._get_id(
763-
"album", urls["deezer"], deezer_id_regex
764-
)
765-
if "beatport" in urls:
766-
info.beatport_album_id = MetadataSourcePlugin._get_id(
767-
"album", urls["beatport"], beatport_id_regex
768-
)
769-
if "tidal" in urls:
770-
info.tidal_album_id = urls["tidal"].split("/")[-1]
771740

772741
extra_albumdatas = plugins.send("mb_album_extract", data=release)
773742
for extra_albumdata in extra_albumdatas:
@@ -866,10 +835,10 @@ def album_for_id(
866835
MusicBrainzAPIError.
867836
"""
868837
self._log.debug("Requesting MusicBrainz release {}", album_id)
869-
albumid = _parse_id(album_id)
870-
if not albumid:
838+
if not (albumid := extract_release_id("musicbrainz", album_id)):
871839
self._log.debug("Invalid MBID ({0}).", album_id)
872840
return None
841+
873842
try:
874843
res = musicbrainzngs.get_release_by_id(albumid, RELEASE_INCLUDES)
875844

@@ -903,10 +872,10 @@ def track_for_id(
903872
"""Fetches a track by its MusicBrainz ID. Returns a TrackInfo object
904873
or None if no track is found. May raise a MusicBrainzAPIError.
905874
"""
906-
trackid = _parse_id(track_id)
907-
if not trackid:
875+
if not (trackid := extract_release_id("musicbrainz", track_id)):
908876
self._log.debug("Invalid MBID ({0}).", track_id)
909877
return None
878+
910879
try:
911880
res = musicbrainzngs.get_recording_by_id(trackid, TRACK_INCLUDES)
912881
except musicbrainzngs.ResponseError:

0 commit comments

Comments
 (0)