diff --git a/calliope/cli.py b/calliope/cli.py index 16c8df07e1280ab31ea97157a76342ebd4cec4bf..e505b0c1dbe75698df547dcdc931afba63c4bf3e 100644 --- a/calliope/cli.py +++ b/calliope/cli.py @@ -729,7 +729,9 @@ def musicbrainz_cli(context): @click.argument('playlist', type=click.File(mode='r')) @click.option('--output', type=click.File(mode='w'), default='-') @click.option( - '--include', '-i', type=click.Choice(['areas', 'release', 'urls']), multiple=True + '--include', '-i', type=str, multiple=True, + help="Select extra data to include in result. Globs using '*' are allowed. " + "Use `list-includes` to see all possible values." ) @click.option('--update/--no-update', default=False, help='Overwrite track metadata') @click.option( @@ -762,6 +764,14 @@ def cmd_musicbrainz_annotate( calliope.playlist.write(result_generator, output) +@musicbrainz_cli.command(name='list-includes') +@click.pass_context +def cmd_musicbrainz_list_includes(context): + """List all possible values for `--include` option.""" + for item in sorted(calliope.musicbrainz.includes.all_include_key_fullnames()): + sys.stdout.write(f" * {item}\n") + + @musicbrainz_cli.command(name='resolve-image') @click.argument('playlist', type=click.File(mode='r')) @click.option( diff --git a/calliope/musicbrainz/__init__.py b/calliope/musicbrainz/__init__.py index 84578b17ba5ddb3a317de5307fbda35ae98088cf..01798c883c9b87816a2c409106933683274b64b7 100644 --- a/calliope/musicbrainz/__init__.py +++ b/calliope/musicbrainz/__init__.py @@ -41,7 +41,7 @@ import musicbrainzngs import calliope.cache import calliope.config import calliope.playlist -from . import annotate_helpers, resolve +from . import annotate_helpers, includes, resolve from .context import MusicbrainzContext log = logging.getLogger(__name__) @@ -49,10 +49,39 @@ log = logging.getLogger(__name__) def annotate(context: MusicbrainzContext, playlist: calliope.playlist.Playlist, - include: [str], + include_patterns: [str], select_fun=None, update=False): - """Annotate each item in a playlist with metadata from Musicbrainz.""" + """Annotate each item in a playlist with metadata from Musicbrainz. + + The ``include_patterns`` parameter controls what subqueries are run, + via the MusicBrainz `include` query parameter. + + This parameter takes keys like `areas` or `url-rels` which cause more data + to be fetched. MusicBrainz has different resource types, while in Calliope + everything is a :class:`playlist item `, so within + Calliope we specify keys as ``typename.key``. These are examples of + different include key fullnames: + + * ``artist.areas`` + * ``artist.url-rels`` + * ``recording.url-rels`` + + In ``include_patterns`` you can pass literal key names, and you can use + ``*`` as a wildcard. For example, you can get all ``url-rels`` information + with ``*.url-rels``, and all info about an artist with ``artist.*``. + + For reference documentation of the `include` parameter, see: + . + + Use :func:`calliope.includes.all_include_key_fullnames` to retrieve the + full list of include keys. + + """ + + include = includes.expand_fullname_patterns(include_patterns) + log.debug("include: %s", ','.join(include)) + for item in playlist: match = annotate_helpers.search(context, item, select_fun=select_fun) if match is not None: @@ -61,14 +90,7 @@ def annotate(context: MusicbrainzContext, item[key] = v item["calliope.musicbrainz.resolver_score"] = match["_.priority"] - if 'areas' in include: - item = annotate_helpers.add_musicbrainz_artist_areas(context.cache, item) - - if 'release' in include: - item = annotate_helpers.add_musicbrainz_album_release(context.cache, item) - - if 'urls' in include: - item = annotate_helpers.add_musicbrainz_artist_urls(context.cache, item) + item = annotate_helpers.query_data(context.cache, item, include) yield item diff --git a/calliope/musicbrainz/annotate_helpers.py b/calliope/musicbrainz/annotate_helpers.py index 1a8fda86a62f556721d8c7e5364d6ea0f433c88c..381161450302f1ba4da94d68fcd64c01e3435b1a 100644 --- a/calliope/musicbrainz/annotate_helpers.py +++ b/calliope/musicbrainz/annotate_helpers.py @@ -17,9 +17,11 @@ import logging from typing import Callable, Dict, List, Optional, Iterable +import warnings import musicbrainzngs +from . import includes from calliope.playlist import Item from calliope.resolvers import select_best from calliope.utils import ( @@ -32,61 +34,70 @@ from calliope.utils import ( log = logging.getLogger(__name__) -def add_musicbrainz_album_release(cache, item): - if 'musicbrainz.album_id' in item: - album_musicbrainz_id = item['musicbrainz.album_id'] - key = 'release:{}'.format(album_musicbrainz_id) - try: - result = cache.wrap(key, - lambda: musicbrainzngs.get_release_by_id(album_musicbrainz_id)['release']) - except musicbrainzngs.ResponseError as e: - if str(e.cause).startswith('HTTP Error 404'): - item.add_warning('musicbrainz', 'Invalid album ID') - return item - else: - raise - - if 'date' in result: - item['musicbrainz.release_date'] = result['date'] - return item +def _query_data_for_typename(cache, item, typename, mbid, include, query_cb): + includes_to_query = set() + data = {} + def cache_key_for_include(keyname): + return f'{mbid}.{typename}.{keyname}' -def add_musicbrainz_artist_areas(cache, item): - def get_areas(result): - result_main_area = result['artist'].get('area') - if result_main_area: - result_areas = [result_main_area] + for key in include: + result = cache.lookup(cache_key_for_include(key.name)) + if result.found_timestamp is not None: + data[key.name] = result.value else: - result_areas = [] - return result_areas - - if 'musicbrainz.artist_id' in item: - artist_musicbrainz_id = item['musicbrainz.artist_id'] - key = 'creator:{}:areas'.format(artist_musicbrainz_id) - result = cache.wrap(key, - lambda: get_areas( - musicbrainzngs.get_artist_by_id(item['musicbrainz.artist_id'], includes='area-rels'))) - - item_areas = item.get('musicbrainz.creator_areas', []) - for area in result: - item_areas.append(area) - item['musicbrainz.creator_areas'] = item_areas + includes_to_query.add(key) + + if len(includes_to_query) > 0: + response = query_cb(mbid, includes=list(key.name for key in includes_to_query)) + response_data = response[typename] + + for key in includes_to_query: + value = {} + for response_key in key.response_keys: + value[response_key] = response_data.get(response_key, []) + cache.store(cache_key_for_include(key.name), value) + data[key.name] = value + + def attach_data_to_playlist_item(response_keyname, data): + out_typename = typename.replace('-', '_') + out_keyname = response_keyname.replace('-', '_') + + prop = f'musicbrainz.{out_typename}.{out_keyname}' + item[prop] = data + + for response_dict in data.values(): + for response_keyname, data_for_key in response_dict.items(): + attach_data_to_playlist_item(response_keyname, data_for_key) return item -def add_musicbrainz_artist_urls(cache, item): - if 'musicbrainz.artist_id' in item: - artist_musicbrainz_id = item['musicbrainz.artist_id'] - key = 'creator:{}:urls'.format(artist_musicbrainz_id) - result = cache.wrap(key, - lambda: musicbrainzngs.get_artist_by_id( - artist_musicbrainz_id, includes='url-rels')['artist'].get('url-relation-list', [])) +def query_data(cache, item, include: [str]) -> Item: + """Query all MusicBrainz data specified by keys in ``include``.""" + + query_functions = { + 'artist': musicbrainzngs.get_artist_by_id, + 'recording': musicbrainzngs.get_recording_by_id, + 'release': musicbrainzngs.get_release_by_id, + 'release-group': musicbrainzngs.get_release_group_by_id, + 'work': musicbrainzngs.get_work_by_id, + } + + for typename, query_fn in query_functions.items(): + include_keys_for_type = [ + includes.get_key(fullname) + for fullname in include + if fullname.startswith(typename + '.') + ] + + if len(include_keys_for_type) > 0: + typename_underscore = typename.replace('-', '_') + mbid = item.get(f'musicbrainz.{typename_underscore}_id') + if mbid: + item = _query_data_for_typename( + cache, item, typename, mbid, include_keys_for_type, query_fn + ) - item_urls = item.get('musicbrainz.creator_urls', []) - for result_url in result: - item_urls.append( - {'musicbrainz.url.type': result_url['type'], 'musicbrainz.url.target': result_url['target']}) - item['musicbrainz.creator_urls'] = item_urls return item @@ -124,6 +135,7 @@ def _recordings_to_items(mb_recordings: Iterable[Dict]) -> Iterable[Item]: if "length" in mb_recording else None, "musicbrainz.albumartist": mb_release.get("artist-credit-phrase"), + "musicbrainz.release_id": mb_release.get("id"), "musicbrainz.release_group_id": get_nested( mb_release, ("release-group", "id") ), @@ -264,6 +276,7 @@ def search( """ query_kwargs = _build_search_kwargs(item) + log.debug("Running search: %s", query_kwargs) if "title" in item: mb_recordings = context.cache.wrap( str(query_kwargs), @@ -283,7 +296,8 @@ def search( ) candidates = _artists_to_items(mb_artists) else: - raise KeyError + warnings.warn("Did not find 'title', 'album' or 'creator' in item.") + return None candidates = list(candidates) if candidates is None or len(candidates) == 0: diff --git a/calliope/musicbrainz/includes.py b/calliope/musicbrainz/includes.py new file mode 100644 index 0000000000000000000000000000000000000000..649ebc2159b5fb011fbe9fc8c89cc2788a60120c --- /dev/null +++ b/calliope/musicbrainz/includes.py @@ -0,0 +1,280 @@ +# Calliope +# Copyright (C) 2022 Sam Thursfield +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +"""Helpers for filtering data when querying MusicBrainz.""" + + +import dataclasses +from enum import Enum +import re + + +class MBType(Enum): + """Supported MusicBrainz resource types.""" + ARTIST = 0 + RECORDING = 1 + RELEASE = 2 + RELEASE_GROUP = 4 + WORK = 4 + + def typename(self) -> str: + return self.name.lower().replace('_', '-') + + +ALL_MB_TYPES = { + MBType.ARTIST, MBType.RECORDING, MBType.RELEASE, MBType.RELEASE_GROUP, MBType.WORK +} + + +@dataclasses.dataclass +class IncludeKey: + """Include flags used in MusicBrainz API queries. + + Each MusicBrainz API call has an 'include' parameter to control what data + is returned. This class represents each possible 'include' option. + + """ + + # List of MusicBrainz resource types which take this key. + types: set(MBType) + + # Key passed to 'include' in the relevant query function. + name: str + + # Where data is found in the response. + response_keys: [str] + + def applies_to_typename(self, typename) -> bool: + type_enum = getattr(MBType, typename.upper().replace('-', '_')) + return type_enum in self.types + + def fullnames(self) -> [str]: + """List all fully qualified names of this key. + + A fullname is formatted as `typename.keyname`, for example: + + * artist.url-rels + * recording.url-rels + + """ + return [f'{t.typename()}.{self.name}' for t in self.types] + + def outname(self) -> str: + """Name used in Calliope Item keys. + + For example, 'artist-rels' becomes 'artist_rels'. + + """ + return self.name.replace('-', '_') + + def __str__(self): + return self.name + + def __hash__(self): + return hash(self.name) + + +# The include keys were taken from musicbrainzngs 0.7.1 docs: +# https://python-musicbrainzngs.readthedocs.io/en/v0.7.1/api/ +# +# The list and count key names were found by manually inspecting +# the response from the server. +INCLUDE_KEYS = [ + IncludeKey( + ALL_MB_TYPES, + 'aliases', ['alias-list'] + ), + IncludeKey( + ALL_MB_TYPES, + 'annotation', ['annotation-list'] + ), + IncludeKey( + ALL_MB_TYPES, + 'area-rels', ['area-relation-list'] + ), + IncludeKey( + {MBType.RECORDING, MBType.RELEASE_GROUP, MBType.RELEASE}, + 'artist-credits', ['artist-credit', 'artist-credit-phrase'] + ), + IncludeKey( + ALL_MB_TYPES, + 'artist-rels', ['artist-relation-list'] + ), + IncludeKey( + {MBType.RECORDING, MBType.RELEASE_GROUP, MBType.RELEASE, MBType.WORK}, + 'artists', ['artist-list'] + ), + IncludeKey( + {MBType.RECORDING, MBType.RELEASE_GROUP, MBType.RELEASE}, + 'discids', ['discid-list'] + ), + IncludeKey( + ALL_MB_TYPES, + 'event-rels', ['event-relation-list'] + ), + IncludeKey( + ALL_MB_TYPES, + 'instrument-rels', ['instrument-relation-list'] + ), + IncludeKey( + {MBType.ARTIST, MBType.RECORDING, MBType.RELEASE}, + 'isrcs', ['isrc-list'] + ), + IncludeKey( + ALL_MB_TYPES, + 'label-rels', ['label-relation-list'] + ), + IncludeKey( + {MBType.RELEASE}, + 'labels', ['label-list'] + ), + IncludeKey( + {MBType.ARTIST, MBType.RECORDING, MBType.RELEASE_GROUP, MBType.RELEASE}, + 'media', ['media-list'] + ), + IncludeKey( + ALL_MB_TYPES, + 'place-rels', ['place-relation-list'] + ), + IncludeKey( + {MBType.ARTIST, MBType.RECORDING, MBType.RELEASE_GROUP, MBType.WORK}, + 'ratings', ['rating'] + ), + IncludeKey( + {MBType.RELEASE}, + 'recording-level-rels', ['recording-level-relation-list'] + ), + IncludeKey( + ALL_MB_TYPES - {MBType.RECORDING}, + 'recording-rels', ['recording-relation-list'] + ), + IncludeKey( + {MBType.ARTIST, MBType.RELEASE}, + 'recordings', ['recording-list', 'recording-count'] + ), + IncludeKey( + ALL_MB_TYPES, + 'release-group-rels', ['release-group-relation-list'] + ), + IncludeKey( + {MBType.ARTIST, MBType.RELEASE}, + 'release-groups', ['release-group-list'] + ), + IncludeKey( + ALL_MB_TYPES, + 'release-rels', ['release-relation-list'] + ), + IncludeKey( + {MBType.ARTIST, MBType.RECORDING, MBType.RELEASE_GROUP}, + 'releases', ['release-list', 'release-count'] + ), + IncludeKey( + ALL_MB_TYPES, + 'series-rels', ['series-relation-list'] + ), + IncludeKey( + ALL_MB_TYPES, + 'tags', ['tag-list'] + ), + IncludeKey( + ALL_MB_TYPES, + 'url-rels', ['url-relation-list'] + ), + IncludeKey( + {MBType.RECORDING, MBType.RELEASE}, + 'work-level-rels', ['work-level-relation-list'] + ), + IncludeKey( + ALL_MB_TYPES, + 'work-rels', ['work-relation-list'] + ), + IncludeKey( + {MBType.ARTIST}, + 'works', ['work-list', 'work-count'] + ), + # Not sure about this one. + #'various-artists', + # Avoid keys which require user authentication. + # 'user-ratings', 'user-tags' +] + + +__key_fullname_map = None +def _key_fullname_map() -> [str,IncludeKey]: + global __key_fullname_map # pylint: disable=global-statement + if __key_fullname_map is None: + __key_fullname_map = {} + for key in INCLUDE_KEYS: + for fullname in key.fullnames(): + __key_fullname_map[fullname] = key + return __key_fullname_map + + +def all_include_key_fullnames() -> set: + return set(_key_fullname_map().keys()) + + +def typenames() -> [str]: + return [t.typename() for t in ALL_MB_TYPES] + + +def _pattern_to_re(pattern) -> re.Pattern: + validate_re = re.compile('[^a-z-.*]') + match = validate_re.search(pattern) + if match: + raise RuntimeError( + f"Invalid character '{match.group(0)}' found in include pattern." + ) + + pattern_safe = pattern.replace('.', r'\.') + code = '^' + pattern_safe.replace('*', '[a-z-.]+') + '$' + return re.compile(code) + + +def get_key(fullname) -> IncludeKey: + return _key_fullname_map()[fullname] + + +def expand_fullname_patterns(patterns: [str]) -> [str]: + """Helper for tools which accept glob patterns for include-keys. + + This allows commandline users to specify "artist.*" rather than listing + all artist include-keys explicitly. + + """ + result = set() + + def pattern_matchers(): + """Generate a match callback for each input pattern.""" + for pattern in patterns: + # Python users often prefer _ over - as separator, so allow both. + pattern = pattern.replace('_', '-') + + if '*' in pattern: + pattern_re = _pattern_to_re(pattern) + yield pattern_re.match + else: + yield pattern.__eq__ + + matchers = list(pattern_matchers()) + for key in INCLUDE_KEYS: + for fullname in key.fullnames(): + for matcher in matchers: + if matcher(fullname): + result.add(fullname) + + return result diff --git a/calliope/musicbrainz/meson.build b/calliope/musicbrainz/meson.build index 2e5b36a4a863e7a57adcc0c66e92fa10eb357d97..c1745f5a21bc7ea1e99d3d9953813faba6706292 100644 --- a/calliope/musicbrainz/meson.build +++ b/calliope/musicbrainz/meson.build @@ -2,6 +2,7 @@ sources = files( '__init__.py', 'annotate_helpers.py', 'context.py', + 'includes.py', 'resolve.py', ) diff --git a/docs/changelog.rst b/docs/changelog.rst index e3ea7502a9e481e4d8eeba29bfc39d8498f067dc..9b923bfdcf7ce4b2761a641a443dcfa87194e661 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,15 @@ Changelog Unreleased ---------- + * musicbrainz: The ``annotate --include`` option now supports all values that + the `MusicBrainz API `_ + supports. + + * See the full list of keys with ``cpe musicbrainz list-includes`` + * Use ``*`` to select many keys, e.g. ``--include=artist.*`` to select + all data related to the artist. + + :mr:`200` * cli: Quiet 'unhandled attribute' warnings from 'musicbrainzngs' library when `-v 3`. :mr:`199` diff --git a/docs/meson.build b/docs/meson.build index 8289ffd500f1b6dbf9e0409f1e228daa1bea6d8a..a6c87bedd7b806f5992ed24e99b5308588085b88 100644 --- a/docs/meson.build +++ b/docs/meson.build @@ -1,6 +1,7 @@ sphinx_sources = [ 'conf.py', 'basics.rst', + 'changelog.rst', 'index.rst', 'format.rst', 'reference-cli.rst', diff --git a/tests/test_musicbrainz.py b/tests/test_musicbrainz.py index 35c744fb4ed5e661defb04c6661e49a24fa41e18..3eea968b4ad0c2204760c09cc300071783edf65d 100644 --- a/tests/test_musicbrainz.py +++ b/tests/test_musicbrainz.py @@ -113,6 +113,11 @@ MB = r"https://musicbrainz.org/ws/2" COVERART = r"https://coverartarchive.org" +def test_list_includes(cli): + result = cli.run(["list-includes"]) + result.assert_success() + + def test_resolve_artist(cli, mock_server): mock_server( [ @@ -177,6 +182,17 @@ def test_resolve_album(cli, mock_server): ] +def test_resolve_invalid(cli, mock_server): + mock_server([(".*", error_404())]) + + playlist = [{"nonsense": "nothing"}] + + with pytest.warns(UserWarning): + result = cli.run(["annotate", "--output", "-", "-"], input_playlist=playlist) + result.assert_success() + + + def test_resolve_track(cli, mock_server): mock_server( [ @@ -216,6 +232,7 @@ def test_resolve_track(cli, mock_server): "musicbrainz.isrcs": ["GBBKS1400215"], "musicbrainz.length": 186436.0, "musicbrainz.recording_id": "13b18d21-9600-419c-888e-99c9c5e1d9a3", + "musicbrainz.release_id": "59c00320-db1c-4f45-b8ff-889281b544a1", "musicbrainz.release_group_id": "08c5a214-0628-4257-837d-f6e56a043631", "musicbrainz.title": "The Light", "title": "The Light",