From 5b6b70e9549a6e4e4ca62bfc04556e39f0702821 Mon Sep 17 00:00:00 2001 From: Sam Thursfield Date: Fri, 20 May 2022 18:52:54 +0200 Subject: [PATCH 1/5] musicbrainz: Include `musicbrainz.release_id` in recording search Previously we included release_group_id but not release_id. --- calliope/musicbrainz/annotate_helpers.py | 1 + tests/test_musicbrainz.py | 1 + 2 files changed, 2 insertions(+) diff --git a/calliope/musicbrainz/annotate_helpers.py b/calliope/musicbrainz/annotate_helpers.py index 1a8fda8..f18f1e3 100644 --- a/calliope/musicbrainz/annotate_helpers.py +++ b/calliope/musicbrainz/annotate_helpers.py @@ -124,6 +124,7 @@ def _recordings_to_items(mb_recordings: Iterable[Dict]) -> Iterable[Item]: if "length" in mb_recording else None, "musicbrainz.albumartist": mb_release.get("artist-credit-phrase"), + "musicbrainz.release_id": mb_release.get("id"), "musicbrainz.release_group_id": get_nested( mb_release, ("release-group", "id") ), diff --git a/tests/test_musicbrainz.py b/tests/test_musicbrainz.py index 35c744f..15cc8d3 100644 --- a/tests/test_musicbrainz.py +++ b/tests/test_musicbrainz.py @@ -216,6 +216,7 @@ def test_resolve_track(cli, mock_server): "musicbrainz.isrcs": ["GBBKS1400215"], "musicbrainz.length": 186436.0, "musicbrainz.recording_id": "13b18d21-9600-419c-888e-99c9c5e1d9a3", + "musicbrainz.release_id": "59c00320-db1c-4f45-b8ff-889281b544a1", "musicbrainz.release_group_id": "08c5a214-0628-4257-837d-f6e56a043631", "musicbrainz.title": "The Light", "title": "The Light", -- GitLab From 5c52f523c329e60070ab8fa673325d45a96271c9 Mon Sep 17 00:00:00 2001 From: Sam Thursfield Date: Thu, 19 May 2022 17:31:58 +0200 Subject: [PATCH 2/5] musicbrainz: Warn when there's no way to annotate an item Previously the process would crash with a KeyError(). --- calliope/musicbrainz/annotate_helpers.py | 5 ++++- tests/test_musicbrainz.py | 11 +++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/calliope/musicbrainz/annotate_helpers.py b/calliope/musicbrainz/annotate_helpers.py index f18f1e3..1658d30 100644 --- a/calliope/musicbrainz/annotate_helpers.py +++ b/calliope/musicbrainz/annotate_helpers.py @@ -17,6 +17,7 @@ import logging from typing import Callable, Dict, List, Optional, Iterable +import warnings import musicbrainzngs @@ -265,6 +266,7 @@ def search( """ query_kwargs = _build_search_kwargs(item) + log.debug("Running search: %s", query_kwargs) if "title" in item: mb_recordings = context.cache.wrap( str(query_kwargs), @@ -284,7 +286,8 @@ def search( ) candidates = _artists_to_items(mb_artists) else: - raise KeyError + warnings.warn("Did not find 'title', 'album' or 'creator' in item.") + return None candidates = list(candidates) if candidates is None or len(candidates) == 0: diff --git a/tests/test_musicbrainz.py b/tests/test_musicbrainz.py index 15cc8d3..1f656b6 100644 --- a/tests/test_musicbrainz.py +++ b/tests/test_musicbrainz.py @@ -177,6 +177,17 @@ def test_resolve_album(cli, mock_server): ] +def test_resolve_invalid(cli, mock_server): + mock_server([(".*", error_404())]) + + playlist = [{"nonsense": "nothing"}] + + with pytest.warns(UserWarning): + result = cli.run(["annotate", "--output", "-", "-"], input_playlist=playlist) + result.assert_success() + + + def test_resolve_track(cli, mock_server): mock_server( [ -- GitLab From 43eeecaab9e4abf02c3c2e4be3dbb32110a89f13 Mon Sep 17 00:00:00 2001 From: Sam Thursfield Date: Fri, 20 May 2022 17:59:22 +0200 Subject: [PATCH 3/5] musicbrainz: Change how `--include` option works Allow all values that MusicBrainz itself allows, and return exactly what MusicBrainz returns. There's no point redefining this in our own way, at least not yet. Some examples. Query all data about an artist: cpe musicbrainz annotate --include='artist.*' - Query all URLs for a release: cpe musicbrainz annotate --include='*.url-rels' - --- calliope/cli.py | 12 +- calliope/musicbrainz/__init__.py | 44 +++- calliope/musicbrainz/annotate_helpers.py | 106 +++++---- calliope/musicbrainz/includes.py | 280 +++++++++++++++++++++++ calliope/musicbrainz/meson.build | 1 + tests/test_musicbrainz.py | 5 + 6 files changed, 388 insertions(+), 60 deletions(-) create mode 100644 calliope/musicbrainz/includes.py diff --git a/calliope/cli.py b/calliope/cli.py index 16c8df0..e505b0c 100644 --- a/calliope/cli.py +++ b/calliope/cli.py @@ -729,7 +729,9 @@ def musicbrainz_cli(context): @click.argument('playlist', type=click.File(mode='r')) @click.option('--output', type=click.File(mode='w'), default='-') @click.option( - '--include', '-i', type=click.Choice(['areas', 'release', 'urls']), multiple=True + '--include', '-i', type=str, multiple=True, + help="Select extra data to include in result. Globs using '*' are allowed. " + "Use `list-includes` to see all possible values." ) @click.option('--update/--no-update', default=False, help='Overwrite track metadata') @click.option( @@ -762,6 +764,14 @@ def cmd_musicbrainz_annotate( calliope.playlist.write(result_generator, output) +@musicbrainz_cli.command(name='list-includes') +@click.pass_context +def cmd_musicbrainz_list_includes(context): + """List all possible values for `--include` option.""" + for item in sorted(calliope.musicbrainz.includes.all_include_key_fullnames()): + sys.stdout.write(f" * {item}\n") + + @musicbrainz_cli.command(name='resolve-image') @click.argument('playlist', type=click.File(mode='r')) @click.option( diff --git a/calliope/musicbrainz/__init__.py b/calliope/musicbrainz/__init__.py index 84578b1..01798c8 100644 --- a/calliope/musicbrainz/__init__.py +++ b/calliope/musicbrainz/__init__.py @@ -41,7 +41,7 @@ import musicbrainzngs import calliope.cache import calliope.config import calliope.playlist -from . import annotate_helpers, resolve +from . import annotate_helpers, includes, resolve from .context import MusicbrainzContext log = logging.getLogger(__name__) @@ -49,10 +49,39 @@ log = logging.getLogger(__name__) def annotate(context: MusicbrainzContext, playlist: calliope.playlist.Playlist, - include: [str], + include_patterns: [str], select_fun=None, update=False): - """Annotate each item in a playlist with metadata from Musicbrainz.""" + """Annotate each item in a playlist with metadata from Musicbrainz. + + The ``include_patterns`` parameter controls what subqueries are run, + via the MusicBrainz `include` query parameter. + + This parameter takes keys like `areas` or `url-rels` which cause more data + to be fetched. MusicBrainz has different resource types, while in Calliope + everything is a :class:`playlist item `, so within + Calliope we specify keys as ``typename.key``. These are examples of + different include key fullnames: + + * ``artist.areas`` + * ``artist.url-rels`` + * ``recording.url-rels`` + + In ``include_patterns`` you can pass literal key names, and you can use + ``*`` as a wildcard. For example, you can get all ``url-rels`` information + with ``*.url-rels``, and all info about an artist with ``artist.*``. + + For reference documentation of the `include` parameter, see: + . + + Use :func:`calliope.includes.all_include_key_fullnames` to retrieve the + full list of include keys. + + """ + + include = includes.expand_fullname_patterns(include_patterns) + log.debug("include: %s", ','.join(include)) + for item in playlist: match = annotate_helpers.search(context, item, select_fun=select_fun) if match is not None: @@ -61,14 +90,7 @@ def annotate(context: MusicbrainzContext, item[key] = v item["calliope.musicbrainz.resolver_score"] = match["_.priority"] - if 'areas' in include: - item = annotate_helpers.add_musicbrainz_artist_areas(context.cache, item) - - if 'release' in include: - item = annotate_helpers.add_musicbrainz_album_release(context.cache, item) - - if 'urls' in include: - item = annotate_helpers.add_musicbrainz_artist_urls(context.cache, item) + item = annotate_helpers.query_data(context.cache, item, include) yield item diff --git a/calliope/musicbrainz/annotate_helpers.py b/calliope/musicbrainz/annotate_helpers.py index 1658d30..3811614 100644 --- a/calliope/musicbrainz/annotate_helpers.py +++ b/calliope/musicbrainz/annotate_helpers.py @@ -21,6 +21,7 @@ import warnings import musicbrainzngs +from . import includes from calliope.playlist import Item from calliope.resolvers import select_best from calliope.utils import ( @@ -33,61 +34,70 @@ from calliope.utils import ( log = logging.getLogger(__name__) -def add_musicbrainz_album_release(cache, item): - if 'musicbrainz.album_id' in item: - album_musicbrainz_id = item['musicbrainz.album_id'] - key = 'release:{}'.format(album_musicbrainz_id) - try: - result = cache.wrap(key, - lambda: musicbrainzngs.get_release_by_id(album_musicbrainz_id)['release']) - except musicbrainzngs.ResponseError as e: - if str(e.cause).startswith('HTTP Error 404'): - item.add_warning('musicbrainz', 'Invalid album ID') - return item - else: - raise - - if 'date' in result: - item['musicbrainz.release_date'] = result['date'] - return item +def _query_data_for_typename(cache, item, typename, mbid, include, query_cb): + includes_to_query = set() + data = {} + def cache_key_for_include(keyname): + return f'{mbid}.{typename}.{keyname}' -def add_musicbrainz_artist_areas(cache, item): - def get_areas(result): - result_main_area = result['artist'].get('area') - if result_main_area: - result_areas = [result_main_area] + for key in include: + result = cache.lookup(cache_key_for_include(key.name)) + if result.found_timestamp is not None: + data[key.name] = result.value else: - result_areas = [] - return result_areas - - if 'musicbrainz.artist_id' in item: - artist_musicbrainz_id = item['musicbrainz.artist_id'] - key = 'creator:{}:areas'.format(artist_musicbrainz_id) - result = cache.wrap(key, - lambda: get_areas( - musicbrainzngs.get_artist_by_id(item['musicbrainz.artist_id'], includes='area-rels'))) - - item_areas = item.get('musicbrainz.creator_areas', []) - for area in result: - item_areas.append(area) - item['musicbrainz.creator_areas'] = item_areas + includes_to_query.add(key) + + if len(includes_to_query) > 0: + response = query_cb(mbid, includes=list(key.name for key in includes_to_query)) + response_data = response[typename] + + for key in includes_to_query: + value = {} + for response_key in key.response_keys: + value[response_key] = response_data.get(response_key, []) + cache.store(cache_key_for_include(key.name), value) + data[key.name] = value + + def attach_data_to_playlist_item(response_keyname, data): + out_typename = typename.replace('-', '_') + out_keyname = response_keyname.replace('-', '_') + + prop = f'musicbrainz.{out_typename}.{out_keyname}' + item[prop] = data + + for response_dict in data.values(): + for response_keyname, data_for_key in response_dict.items(): + attach_data_to_playlist_item(response_keyname, data_for_key) return item -def add_musicbrainz_artist_urls(cache, item): - if 'musicbrainz.artist_id' in item: - artist_musicbrainz_id = item['musicbrainz.artist_id'] - key = 'creator:{}:urls'.format(artist_musicbrainz_id) - result = cache.wrap(key, - lambda: musicbrainzngs.get_artist_by_id( - artist_musicbrainz_id, includes='url-rels')['artist'].get('url-relation-list', [])) +def query_data(cache, item, include: [str]) -> Item: + """Query all MusicBrainz data specified by keys in ``include``.""" + + query_functions = { + 'artist': musicbrainzngs.get_artist_by_id, + 'recording': musicbrainzngs.get_recording_by_id, + 'release': musicbrainzngs.get_release_by_id, + 'release-group': musicbrainzngs.get_release_group_by_id, + 'work': musicbrainzngs.get_work_by_id, + } + + for typename, query_fn in query_functions.items(): + include_keys_for_type = [ + includes.get_key(fullname) + for fullname in include + if fullname.startswith(typename + '.') + ] + + if len(include_keys_for_type) > 0: + typename_underscore = typename.replace('-', '_') + mbid = item.get(f'musicbrainz.{typename_underscore}_id') + if mbid: + item = _query_data_for_typename( + cache, item, typename, mbid, include_keys_for_type, query_fn + ) - item_urls = item.get('musicbrainz.creator_urls', []) - for result_url in result: - item_urls.append( - {'musicbrainz.url.type': result_url['type'], 'musicbrainz.url.target': result_url['target']}) - item['musicbrainz.creator_urls'] = item_urls return item diff --git a/calliope/musicbrainz/includes.py b/calliope/musicbrainz/includes.py new file mode 100644 index 0000000..649ebc2 --- /dev/null +++ b/calliope/musicbrainz/includes.py @@ -0,0 +1,280 @@ +# Calliope +# Copyright (C) 2022 Sam Thursfield +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +"""Helpers for filtering data when querying MusicBrainz.""" + + +import dataclasses +from enum import Enum +import re + + +class MBType(Enum): + """Supported MusicBrainz resource types.""" + ARTIST = 0 + RECORDING = 1 + RELEASE = 2 + RELEASE_GROUP = 4 + WORK = 4 + + def typename(self) -> str: + return self.name.lower().replace('_', '-') + + +ALL_MB_TYPES = { + MBType.ARTIST, MBType.RECORDING, MBType.RELEASE, MBType.RELEASE_GROUP, MBType.WORK +} + + +@dataclasses.dataclass +class IncludeKey: + """Include flags used in MusicBrainz API queries. + + Each MusicBrainz API call has an 'include' parameter to control what data + is returned. This class represents each possible 'include' option. + + """ + + # List of MusicBrainz resource types which take this key. + types: set(MBType) + + # Key passed to 'include' in the relevant query function. + name: str + + # Where data is found in the response. + response_keys: [str] + + def applies_to_typename(self, typename) -> bool: + type_enum = getattr(MBType, typename.upper().replace('-', '_')) + return type_enum in self.types + + def fullnames(self) -> [str]: + """List all fully qualified names of this key. + + A fullname is formatted as `typename.keyname`, for example: + + * artist.url-rels + * recording.url-rels + + """ + return [f'{t.typename()}.{self.name}' for t in self.types] + + def outname(self) -> str: + """Name used in Calliope Item keys. + + For example, 'artist-rels' becomes 'artist_rels'. + + """ + return self.name.replace('-', '_') + + def __str__(self): + return self.name + + def __hash__(self): + return hash(self.name) + + +# The include keys were taken from musicbrainzngs 0.7.1 docs: +# https://python-musicbrainzngs.readthedocs.io/en/v0.7.1/api/ +# +# The list and count key names were found by manually inspecting +# the response from the server. +INCLUDE_KEYS = [ + IncludeKey( + ALL_MB_TYPES, + 'aliases', ['alias-list'] + ), + IncludeKey( + ALL_MB_TYPES, + 'annotation', ['annotation-list'] + ), + IncludeKey( + ALL_MB_TYPES, + 'area-rels', ['area-relation-list'] + ), + IncludeKey( + {MBType.RECORDING, MBType.RELEASE_GROUP, MBType.RELEASE}, + 'artist-credits', ['artist-credit', 'artist-credit-phrase'] + ), + IncludeKey( + ALL_MB_TYPES, + 'artist-rels', ['artist-relation-list'] + ), + IncludeKey( + {MBType.RECORDING, MBType.RELEASE_GROUP, MBType.RELEASE, MBType.WORK}, + 'artists', ['artist-list'] + ), + IncludeKey( + {MBType.RECORDING, MBType.RELEASE_GROUP, MBType.RELEASE}, + 'discids', ['discid-list'] + ), + IncludeKey( + ALL_MB_TYPES, + 'event-rels', ['event-relation-list'] + ), + IncludeKey( + ALL_MB_TYPES, + 'instrument-rels', ['instrument-relation-list'] + ), + IncludeKey( + {MBType.ARTIST, MBType.RECORDING, MBType.RELEASE}, + 'isrcs', ['isrc-list'] + ), + IncludeKey( + ALL_MB_TYPES, + 'label-rels', ['label-relation-list'] + ), + IncludeKey( + {MBType.RELEASE}, + 'labels', ['label-list'] + ), + IncludeKey( + {MBType.ARTIST, MBType.RECORDING, MBType.RELEASE_GROUP, MBType.RELEASE}, + 'media', ['media-list'] + ), + IncludeKey( + ALL_MB_TYPES, + 'place-rels', ['place-relation-list'] + ), + IncludeKey( + {MBType.ARTIST, MBType.RECORDING, MBType.RELEASE_GROUP, MBType.WORK}, + 'ratings', ['rating'] + ), + IncludeKey( + {MBType.RELEASE}, + 'recording-level-rels', ['recording-level-relation-list'] + ), + IncludeKey( + ALL_MB_TYPES - {MBType.RECORDING}, + 'recording-rels', ['recording-relation-list'] + ), + IncludeKey( + {MBType.ARTIST, MBType.RELEASE}, + 'recordings', ['recording-list', 'recording-count'] + ), + IncludeKey( + ALL_MB_TYPES, + 'release-group-rels', ['release-group-relation-list'] + ), + IncludeKey( + {MBType.ARTIST, MBType.RELEASE}, + 'release-groups', ['release-group-list'] + ), + IncludeKey( + ALL_MB_TYPES, + 'release-rels', ['release-relation-list'] + ), + IncludeKey( + {MBType.ARTIST, MBType.RECORDING, MBType.RELEASE_GROUP}, + 'releases', ['release-list', 'release-count'] + ), + IncludeKey( + ALL_MB_TYPES, + 'series-rels', ['series-relation-list'] + ), + IncludeKey( + ALL_MB_TYPES, + 'tags', ['tag-list'] + ), + IncludeKey( + ALL_MB_TYPES, + 'url-rels', ['url-relation-list'] + ), + IncludeKey( + {MBType.RECORDING, MBType.RELEASE}, + 'work-level-rels', ['work-level-relation-list'] + ), + IncludeKey( + ALL_MB_TYPES, + 'work-rels', ['work-relation-list'] + ), + IncludeKey( + {MBType.ARTIST}, + 'works', ['work-list', 'work-count'] + ), + # Not sure about this one. + #'various-artists', + # Avoid keys which require user authentication. + # 'user-ratings', 'user-tags' +] + + +__key_fullname_map = None +def _key_fullname_map() -> [str,IncludeKey]: + global __key_fullname_map # pylint: disable=global-statement + if __key_fullname_map is None: + __key_fullname_map = {} + for key in INCLUDE_KEYS: + for fullname in key.fullnames(): + __key_fullname_map[fullname] = key + return __key_fullname_map + + +def all_include_key_fullnames() -> set: + return set(_key_fullname_map().keys()) + + +def typenames() -> [str]: + return [t.typename() for t in ALL_MB_TYPES] + + +def _pattern_to_re(pattern) -> re.Pattern: + validate_re = re.compile('[^a-z-.*]') + match = validate_re.search(pattern) + if match: + raise RuntimeError( + f"Invalid character '{match.group(0)}' found in include pattern." + ) + + pattern_safe = pattern.replace('.', r'\.') + code = '^' + pattern_safe.replace('*', '[a-z-.]+') + '$' + return re.compile(code) + + +def get_key(fullname) -> IncludeKey: + return _key_fullname_map()[fullname] + + +def expand_fullname_patterns(patterns: [str]) -> [str]: + """Helper for tools which accept glob patterns for include-keys. + + This allows commandline users to specify "artist.*" rather than listing + all artist include-keys explicitly. + + """ + result = set() + + def pattern_matchers(): + """Generate a match callback for each input pattern.""" + for pattern in patterns: + # Python users often prefer _ over - as separator, so allow both. + pattern = pattern.replace('_', '-') + + if '*' in pattern: + pattern_re = _pattern_to_re(pattern) + yield pattern_re.match + else: + yield pattern.__eq__ + + matchers = list(pattern_matchers()) + for key in INCLUDE_KEYS: + for fullname in key.fullnames(): + for matcher in matchers: + if matcher(fullname): + result.add(fullname) + + return result diff --git a/calliope/musicbrainz/meson.build b/calliope/musicbrainz/meson.build index 2e5b36a..c1745f5 100644 --- a/calliope/musicbrainz/meson.build +++ b/calliope/musicbrainz/meson.build @@ -2,6 +2,7 @@ sources = files( '__init__.py', 'annotate_helpers.py', 'context.py', + 'includes.py', 'resolve.py', ) diff --git a/tests/test_musicbrainz.py b/tests/test_musicbrainz.py index 1f656b6..3eea968 100644 --- a/tests/test_musicbrainz.py +++ b/tests/test_musicbrainz.py @@ -113,6 +113,11 @@ MB = r"https://musicbrainz.org/ws/2" COVERART = r"https://coverartarchive.org" +def test_list_includes(cli): + result = cli.run(["list-includes"]) + result.assert_success() + + def test_resolve_artist(cli, mock_server): mock_server( [ -- GitLab From 763878bfb3efc1225fa8bdbcbef9c0c267b4834f Mon Sep 17 00:00:00 2001 From: Sam Thursfield Date: Fri, 20 May 2022 19:10:58 +0200 Subject: [PATCH 4/5] Update changelog --- docs/changelog.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/changelog.rst b/docs/changelog.rst index e3ea750..9b923bf 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,15 @@ Changelog Unreleased ---------- + * musicbrainz: The ``annotate --include`` option now supports all values that + the `MusicBrainz API `_ + supports. + + * See the full list of keys with ``cpe musicbrainz list-includes`` + * Use ``*`` to select many keys, e.g. ``--include=artist.*`` to select + all data related to the artist. + + :mr:`200` * cli: Quiet 'unhandled attribute' warnings from 'musicbrainzngs' library when `-v 3`. :mr:`199` -- GitLab From f6c268dcb10b78f4c3ba573e8bd2e84347e5dd36 Mon Sep 17 00:00:00 2001 From: Sam Thursfield Date: Fri, 20 May 2022 21:27:51 +0200 Subject: [PATCH 5/5] build: Add changelog.rst to doc dependencies --- docs/meson.build | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/meson.build b/docs/meson.build index 8289ffd..a6c87be 100644 --- a/docs/meson.build +++ b/docs/meson.build @@ -1,6 +1,7 @@ sphinx_sources = [ 'conf.py', 'basics.rst', + 'changelog.rst', 'index.rst', 'format.rst', 'reference-cli.rst', -- GitLab