Skip to content

Commit ad0a784

Browse files
committed
plugins: restructure id extraction
1 parent cab0246 commit ad0a784

11 files changed

+103
-272
lines changed

beets/plugins.py

+4-24
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737

3838
import beets
3939
from beets import logging
40+
from beets.util.id_extractors import extract_release_id
4041

4142
if sys.version_info >= (3, 10):
4243
from typing import ParamSpec
@@ -56,7 +57,6 @@
5657
from beets.importer import ImportSession, ImportTask
5758
from beets.library import Album, Item, Library
5859
from beets.ui import Subcommand
59-
from beets.util.id_extractors import RegexDict
6060

6161
# TYPE_CHECKING guard is needed for any derived type
6262
# which uses an import from `beets.library` and `beets.imported`
@@ -778,11 +778,6 @@ def __init__(self):
778778
super().__init__()
779779
self.config.add({"source_weight": 0.5})
780780

781-
@property
782-
@abc.abstractmethod
783-
def id_regex(self) -> RegexDict:
784-
raise NotImplementedError
785-
786781
@property
787782
@abc.abstractmethod
788783
def data_source(self) -> str:
@@ -872,24 +867,9 @@ def get_artist(
872867

873868
return artist_string, artist_id
874869

875-
@staticmethod
876-
def _get_id(url_type: str, id_: str, id_regex: RegexDict) -> str | None:
877-
"""Parse an ID from its URL if necessary.
878-
879-
:param url_type: Type of URL. Either 'album' or 'track'.
880-
:param id_: Album/track ID or URL.
881-
:param id_regex: A dictionary containing a regular expression
882-
extracting an ID from an URL (if it's not an ID already) in
883-
'pattern' and the number of the match group in 'match_group'.
884-
:return: Album/track ID.
885-
"""
886-
log.debug("Extracting {} ID from '{}'", url_type, id_)
887-
match = re.search(id_regex["pattern"].format(url_type), str(id_))
888-
if match:
889-
id_ = match.group(id_regex["match_group"])
890-
if id_:
891-
return id_
892-
return None
870+
def _get_id(self, id_string: str) -> str | None:
871+
"""Parse release ID from the given ID string."""
872+
return extract_release_id(self.data_source.lower(), id_string)
893873

894874
def candidates(
895875
self,

beets/util/id_extractors.py

+20-48
Original file line numberDiff line numberDiff line change
@@ -14,63 +14,35 @@
1414

1515
"""Helpers around the extraction of album/track ID's from metadata sources."""
1616

17-
import re
18-
from typing import TypedDict
19-
20-
21-
class RegexDict(TypedDict):
22-
"""A dictionary containing a regex pattern and the number of the
23-
match group.
24-
"""
25-
26-
pattern: str
27-
match_group: int
28-
29-
30-
# Spotify IDs consist of 22 alphanumeric characters
31-
# (zero-left-padded base62 representation of randomly generated UUID4)
32-
spotify_id_regex: RegexDict = {
33-
"pattern": r"(^|open\.spotify\.com/{}/)([0-9A-Za-z]{{22}})",
34-
"match_group": 2,
35-
}
36-
37-
deezer_id_regex: RegexDict = {
38-
"pattern": r"(^|deezer\.com/)([a-z]*/)?({}/)?(\d+)",
39-
"match_group": 4,
40-
}
41-
42-
beatport_id_regex: RegexDict = {
43-
"pattern": r"(^|beatport\.com/release/.+/)(\d+)$",
44-
"match_group": 2,
45-
}
46-
47-
# A note on Bandcamp: There is no such thing as a Bandcamp album or artist ID,
48-
# the URL can be used as the identifier. The Bandcamp metadata source plugin
49-
# works that way - https://github.com/snejus/beetcamp. Bandcamp album
50-
# URLs usually look like: https://nameofartist.bandcamp.com/album/nameofalbum
17+
from __future__ import annotations
5118

19+
import re
5220

53-
def extract_discogs_id_regex(album_id):
54-
"""Returns the Discogs_id or None."""
55-
# Discogs-IDs are simple integers. In order to avoid confusion with
56-
# other metadata plugins, we only look for very specific formats of the
57-
# input string:
21+
PATTERN_BY_SOURCE = {
22+
"spotify": re.compile(r"(?:^|open\.spotify\.com/[^/]+/)([0-9A-Za-z]{22})"),
23+
"deezer": re.compile(r"(?:^|deezer\.com/)(?:[a-z]*/)?(?:[^/]+/)?(\d+)"),
24+
"beatport": re.compile(r"(?:^|beatport\.com/release/.+/)(\d+)$"),
25+
"musicbrainz": re.compile(r"(\w{8}(?:-\w{4}){3}-\w{12})"),
5826
# - plain integer, optionally wrapped in brackets and prefixed by an
5927
# 'r', as this is how discogs displays the release ID on its webpage.
6028
# - legacy url format: discogs.com/<name of release>/release/<id>
6129
# - legacy url short format: discogs.com/release/<id>
6230
# - current url format: discogs.com/release/<id>-<name of release>
6331
# See #291, #4080 and #4085 for the discussions leading up to these
6432
# patterns.
65-
# Regex has been tested here https://regex101.com/r/TOu7kw/1
33+
"discogs": re.compile(
34+
r"(?:^|\[?r|discogs\.com/(?:[^/]+/)?release/)(\d+)\b"
35+
),
36+
# There is no such thing as a Bandcamp album or artist ID, the URL can be
37+
# used as the identifier. The Bandcamp metadata source plugin works that way
38+
# - https://github.com/snejus/beetcamp. Bandcamp album URLs usually look
39+
# like: https://nameofartist.bandcamp.com/album/nameofalbum
40+
"bandcamp": re.compile(r"(.+)"),
41+
"tidal": re.compile(r"([^/]+)$"),
42+
}
6643

67-
for pattern in [
68-
r"^\[?r?(?P<id>\d+)\]?$",
69-
r"discogs\.com/release/(?P<id>\d+)-?",
70-
r"discogs\.com/[^/]+/release/(?P<id>\d+)",
71-
]:
72-
match = re.search(pattern, album_id)
73-
if match:
74-
return int(match.group("id"))
7544

45+
def extract_release_id(source: str, id_: str) -> str | None:
46+
if m := PATTERN_BY_SOURCE[source].search(str(id_)):
47+
return m[1]
7648
return None

beetsplug/beatport.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
import beets.ui
3131
from beets.autotag.hooks import AlbumInfo, TrackInfo
3232
from beets.plugins import BeetsPlugin, MetadataSourcePlugin, get_distance
33-
from beets.util.id_extractors import beatport_id_regex
3433

3534
AUTH_ERRORS = (TokenRequestDenied, TokenMissing, VerifierMissing)
3635
USER_AGENT = f"beets/{beets.__version__} +https://beets.io/"
@@ -282,7 +281,6 @@ def __init__(self, data):
282281

283282
class BeatportPlugin(BeetsPlugin):
284283
data_source = "Beatport"
285-
id_regex = beatport_id_regex
286284

287285
def __init__(self):
288286
super().__init__()
@@ -394,8 +392,7 @@ def album_for_id(self, release_id):
394392
"""
395393
self._log.debug("Searching for release {0}", release_id)
396394

397-
release_id = self._get_id("album", release_id, self.id_regex)
398-
if release_id is None:
395+
if not (release_id := self._get_id(release_id)):
399396
self._log.debug("Not a valid Beatport release ID.")
400397
return None
401398

beetsplug/deezer.py

+19-25
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414

1515
"""Adds Deezer release and track search support to the autotagger"""
1616

17+
from __future__ import annotations
18+
1719
import collections
1820
import time
1921

@@ -25,7 +27,6 @@
2527
from beets.dbcore import types
2628
from beets.library import DateType
2729
from beets.plugins import BeetsPlugin, MetadataSourcePlugin
28-
from beets.util.id_extractors import deezer_id_regex
2930

3031

3132
class DeezerPlugin(MetadataSourcePlugin, BeetsPlugin):
@@ -43,8 +44,6 @@ class DeezerPlugin(MetadataSourcePlugin, BeetsPlugin):
4344
album_url = "https://api.deezer.com/album/"
4445
track_url = "https://api.deezer.com/track/"
4546

46-
id_regex = deezer_id_regex
47-
4847
def __init__(self):
4948
super().__init__()
5049

@@ -75,21 +74,15 @@ def fetch_data(self, url):
7574
return None
7675
return data
7776

78-
def album_for_id(self, album_id):
79-
"""Fetch an album by its Deezer ID or URL and return an
80-
AlbumInfo object or None if the album is not found.
81-
82-
:param album_id: Deezer ID or URL for the album.
83-
:type album_id: str
84-
:return: AlbumInfo object for album.
85-
:rtype: beets.autotag.hooks.AlbumInfo or None
86-
"""
87-
deezer_id = self._get_id("album", album_id, self.id_regex)
88-
if deezer_id is None:
77+
def album_for_id(self, album_id: str) -> AlbumInfo | None:
78+
"""Fetch an album by its Deezer ID or URL."""
79+
if not (deezer_id := self._get_id(album_id)):
8980
return None
90-
album_data = self.fetch_data(self.album_url + deezer_id)
91-
if album_data is None:
81+
82+
album_url = f"{self.album_url}{deezer_id}"
83+
if not (album_data := self.fetch_data(album_url)):
9284
return None
85+
9386
contributors = album_data.get("contributors")
9487
if contributors is not None:
9588
artist, artist_id = self.get_artist(contributors)
@@ -132,7 +125,7 @@ def album_for_id(self, album_id):
132125
tracks_data.extend(tracks_obj["data"])
133126

134127
tracks = []
135-
medium_totals = collections.defaultdict(int)
128+
medium_totals: dict[int | None, int] = collections.defaultdict(int)
136129
for i, track_data in enumerate(tracks_data, start=1):
137130
track = self._get_track(track_data)
138131
track.index = i
@@ -150,13 +143,15 @@ def album_for_id(self, album_id):
150143
artist_id=artist_id,
151144
tracks=tracks,
152145
albumtype=album_data["record_type"],
153-
va=len(album_data["contributors"]) == 1
154-
and artist.lower() == "various artists",
146+
va=(
147+
len(album_data["contributors"]) == 1
148+
and (artist or "").lower() == "various artists"
149+
),
155150
year=year,
156151
month=month,
157152
day=day,
158153
label=album_data["label"],
159-
mediums=max(medium_totals.keys()),
154+
mediums=max(filter(None, medium_totals.keys())),
160155
data_source=self.data_source,
161156
data_url=album_data["link"],
162157
cover_art_url=album_data.get("cover_xl"),
@@ -204,12 +199,11 @@ def track_for_id(self, track_id=None, track_data=None):
204199
:rtype: beets.autotag.hooks.TrackInfo or None
205200
"""
206201
if track_data is None:
207-
deezer_id = self._get_id("track", track_id, self.id_regex)
208-
if deezer_id is None:
209-
return None
210-
track_data = self.fetch_data(self.track_url + deezer_id)
211-
if track_data is None:
202+
if not (deezer_id := self._get_id(track_id)) or not (
203+
track_data := self.fetch_data(f"{self.track_url}{deezer_id}")
204+
):
212205
return None
206+
213207
track = self._get_track(track_data)
214208

215209
# Get album's tracks to set `track.index` (position on the entire

beetsplug/discogs.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
from beets import config
3939
from beets.autotag.hooks import AlbumInfo, TrackInfo, string_dist
4040
from beets.plugins import BeetsPlugin, MetadataSourcePlugin, get_distance
41-
from beets.util.id_extractors import extract_discogs_id_regex
41+
from beets.util.id_extractors import extract_release_id
4242

4343
USER_AGENT = f"beets/{beets.__version__} +https://beets.io/"
4444
API_KEY = "rAzVUQYRaoFjeBjyWuWZ"
@@ -266,7 +266,7 @@ def album_for_id(self, album_id):
266266
"""
267267
self._log.debug("Searching for release {0}", album_id)
268268

269-
discogs_id = extract_discogs_id_regex(album_id)
269+
discogs_id = extract_release_id("discogs", album_id)
270270

271271
if not discogs_id:
272272
return None
@@ -401,7 +401,7 @@ def get_album_info(self, result):
401401
else:
402402
genre = base_genre
403403

404-
discogs_albumid = extract_discogs_id_regex(result.data.get("uri"))
404+
discogs_albumid = extract_release_id("discogs", result.data.get("uri"))
405405

406406
# Extract information for the optional AlbumInfo fields that are
407407
# contained on nested discogs fields.

beetsplug/musicbrainz.py

+9-40
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616

1717
from __future__ import annotations
1818

19-
import re
2019
import traceback
2120
from collections import Counter
2221
from itertools import product
@@ -28,13 +27,8 @@
2827
import beets
2928
import beets.autotag.hooks
3029
from beets import config, plugins, util
31-
from beets.plugins import BeetsPlugin, MetadataSourcePlugin
32-
from beets.util.id_extractors import (
33-
beatport_id_regex,
34-
deezer_id_regex,
35-
extract_discogs_id_regex,
36-
spotify_id_regex,
37-
)
30+
from beets.plugins import BeetsPlugin
31+
from beets.util.id_extractors import extract_release_id
3832

3933
if TYPE_CHECKING:
4034
from collections.abc import Iterator, Sequence
@@ -302,17 +296,6 @@ def _set_date_str(
302296
setattr(info, key, date_num)
303297

304298

305-
def _parse_id(s: str) -> str | None:
306-
"""Search for a MusicBrainz ID in the given string and return it. If
307-
no ID can be found, return None.
308-
"""
309-
# Find the first thing that looks like a UUID/MBID.
310-
match = re.search("[a-f0-9]{8}(-[a-f0-9]{4}){3}-[a-f0-9]{12}", s)
311-
if match is not None:
312-
return match.group() if match else None
313-
return None
314-
315-
316299
def _is_translation(r):
317300
_trans_key = "transl-tracklisting"
318301
return r["type"] == _trans_key and r["direction"] == "backward"
@@ -753,24 +736,10 @@ def album_info(self, release: JSONDict) -> beets.autotag.hooks.AlbumInfo:
753736
source.capitalize(),
754737
)
755738

756-
if "discogs" in urls:
757-
info.discogs_albumid = extract_discogs_id_regex(urls["discogs"])
758-
if "bandcamp" in urls:
759-
info.bandcamp_album_id = urls["bandcamp"]
760-
if "spotify" in urls:
761-
info.spotify_album_id = MetadataSourcePlugin._get_id(
762-
"album", urls["spotify"], spotify_id_regex
739+
for source, url in urls.items():
740+
setattr(
741+
info, f"{source}_album_id", extract_release_id(source, url)
763742
)
764-
if "deezer" in urls:
765-
info.deezer_album_id = MetadataSourcePlugin._get_id(
766-
"album", urls["deezer"], deezer_id_regex
767-
)
768-
if "beatport" in urls:
769-
info.beatport_album_id = MetadataSourcePlugin._get_id(
770-
"album", urls["beatport"], beatport_id_regex
771-
)
772-
if "tidal" in urls:
773-
info.tidal_album_id = urls["tidal"].split("/")[-1]
774743

775744
extra_albumdatas = plugins.send("mb_album_extract", data=release)
776745
for extra_albumdata in extra_albumdatas:
@@ -869,10 +838,10 @@ def album_for_id(
869838
MusicBrainzAPIError.
870839
"""
871840
self._log.debug("Requesting MusicBrainz release {}", album_id)
872-
albumid = _parse_id(album_id)
873-
if not albumid:
841+
if not (albumid := extract_release_id("musicbrainz", album_id)):
874842
self._log.debug("Invalid MBID ({0}).", album_id)
875843
return None
844+
876845
try:
877846
res = musicbrainzngs.get_release_by_id(albumid, RELEASE_INCLUDES)
878847

@@ -906,10 +875,10 @@ def track_for_id(
906875
"""Fetches a track by its MusicBrainz ID. Returns a TrackInfo object
907876
or None if no track is found. May raise a MusicBrainzAPIError.
908877
"""
909-
trackid = _parse_id(track_id)
910-
if not trackid:
878+
if not (trackid := extract_release_id("musicbrainz", track_id)):
911879
self._log.debug("Invalid MBID ({0}).", track_id)
912880
return None
881+
913882
try:
914883
res = musicbrainzngs.get_recording_by_id(trackid, TRACK_INCLUDES)
915884
except musicbrainzngs.ResponseError:

0 commit comments

Comments
 (0)