diff --git a/beetsplug/fromfilename.py b/beetsplug/fromfilename.py index c3fb4bc6bf..82c3d444cb 100644 --- a/beetsplug/fromfilename.py +++ b/beetsplug/fromfilename.py @@ -1,5 +1,5 @@ # This file is part of beets. -# Copyright 2016, Jan-Erik Dahlin +# Copyright 2016, Jan-Erik Dahlin, Henry Oberholtzer. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -16,153 +16,566 @@ (possibly also extract track and artist) """ -import os +from __future__ import annotations + import re +from collections.abc import Iterator, MutableMapping, ValuesView +from datetime import datetime +from functools import cached_property +from pathlib import Path +from typing import TYPE_CHECKING -from beets import plugins +from beets import config +from beets.plugins import BeetsPlugin from beets.util import displayable_path -# Filename field extraction patterns. -PATTERNS = [ - # Useful patterns. - ( - r"^(?P\d+)\.?\s*-\s*(?P.+?)\s*-\s*(?P.+?)" - r"(\s*-\s*(?P<tag>.*))?$" - ), - r"^(?P<artist>.+?)\s*-\s*(?P<title>.+?)(\s*-\s*(?P<tag>.*))?$", - r"^(?P<track>\d+)\.?[\s_-]+(?P<title>.+)$", - r"^(?P<title>.+) by (?P<artist>.+)$", - r"^(?P<track>\d+).*$", - r"^(?P<title>.+)$", -] - -# Titles considered "empty" and in need of replacement. -BAD_TITLE_PATTERNS = [ - r"^$", -] - - -def equal(seq): - """Determine whether a sequence holds identical elements.""" - return len(set(seq)) <= 1 - - -def equal_fields(matchdict, field): - """Do all items in `matchdict`, whose values are dictionaries, have - the same value for `field`? (If they do, the field is probably not - the title.) - """ - return equal(m[field] for m in matchdict.values()) - - -def all_matches(names, pattern): - """If all the filenames in the item/filename mapping match the - pattern, return a dictionary mapping the items to dictionaries - giving the value for each named subpattern in the match. Otherwise, - return None. - """ - matches = {} - for item, name in names.items(): - m = re.match(pattern, name, re.IGNORECASE) - if m and m.groupdict(): - # Only yield a match when the regex applies *and* has - # capture groups. Otherwise, no information can be extracted - # from the filename. - matches[item] = m.groupdict() - else: - return None - return matches +if TYPE_CHECKING: + from beets.importer import ImportSession, ImportTask + from beets.library import Item +# Filename field extraction patterns +RE_TRACK_INFO = re.compile( + r""" + (?P<disc>\d+(?=[\.\-_]\d))? + # a disc must be followed by punctuation and a digit + [\.\-]{,1} + # disc punctuation + (?P<track>\d+)? + # match the track number + [\.\-_\s]* + # artist separators + (?P<artist>.+?(?=[\s_]*?[\.\-]|by.+))? + # artist match depends on title existing + [\.\-_\s]* + (?P<by>by)? + # if 'by' is found, artist and title will need to be swapped + [\.\-_\s]* + # title separators + (?P<title>.+)? + # match the track title + """, + re.VERBOSE | re.IGNORECASE, +) -def bad_title(title): - """Determine whether a given title is "bad" (empty or otherwise - meaningless) and in need of replacement. - """ - for pat in BAD_TITLE_PATTERNS: - if re.match(pat, title, re.IGNORECASE): - return True - return False +RE_ALPHANUM_INDEX = re.compile(r"^[A-Z]{1,2}\d{,2}\b") +# Catalog number extraction pattern +RE_CATALOGNUM = re.compile( + r""" + [\(\[\{] + # starts with a bracket + (?!flac|mp3|wav) + # does not start with file format + (?P<catalognum>[\w\s]+) + # actual catalog number + (?<!flac|.mp3|.wav) + # does not end with file format + [\)\]\}] + # ends with a bracket + """, + re.VERBOSE | re.IGNORECASE, +) -def apply_matches(d, log): - """Given a mapping from items to field dicts, apply the fields to - the objects. - """ - some_map = list(d.values())[0] - keys = some_map.keys() +RE_NAMED_SUBGROUP = re.compile(r"\(\?P\<\w+\>") - # Only proceed if the "tag" field is equal across all filenames. - if "tag" in keys and not equal_fields(d, "tag"): - return +# Matches fields that are empty or only whitespace +RE_BAD_FIELD = re.compile(r"^\s*$") - # Given both an "artist" and "title" field, assume that one is - # *actually* the artist, which must be uniform, and use the other - # for the title. This, of course, won't work for VA albums. - # Only check for "artist": patterns containing it, also contain "title" - if "artist" in keys: - if equal_fields(d, "artist"): - artist = some_map["artist"] - title_field = "title" - elif equal_fields(d, "title"): - artist = some_map["title"] - title_field = "artist" - else: - # Both vary. Abort. - return +# First priority for matching a year is a year surrounded +# by brackets, dashes, or punctuation +RE_YEAR_BRACKETED = re.compile( + r"[\(\[\{\-\_]\s*(?P<year>\d{4}).*?[\)\]\}\-\_,]" +) + +# Look for a year at the start +RE_YEAR_START = re.compile(r"^(?P<year>\d{4})") + +# Look for a year at the end +RE_YEAR_END = re.compile(r"$(?P<year>\d{4})") + +# Just look for four digits +RE_YEAR_ANY = re.compile(r"(?P<year>\d{4})") + +# All year regexp in order of preference +YEAR_REGEX = [RE_YEAR_BRACKETED, RE_YEAR_START, RE_YEAR_END, RE_YEAR_ANY] + +RE_MEDIA_TYPE = re.compile( + r""" + [\(\[\{].*? + ((?P<vinyl>vinyl)| + (?P<cd>cd)| + (?P<web>web)| + (?P<cassette>cassette)) + .*?[\)\]\}] + """, + re.VERBOSE | re.IGNORECASE, +) - for item in d: - if not item.artist: - item.artist = artist - log.info("Artist replaced with: {.artist}", item) - # otherwise, if the pattern contains "title", use that for title_field - elif "title" in keys: - title_field = "title" - else: - title_field = None +RE_VARIOUS = re.compile(r"va(rious)?(\sartists)?", re.IGNORECASE) - # Apply the title and track, if any. - for item in d: - if title_field and bad_title(item.title): - item.title = str(d[item][title_field]) - log.info("Title replaced with: {.title}", item) +RE_SPLIT = re.compile(r"[\-\_]+") - if "track" in d[item] and item.track == 0: - item.track = int(d[item]["track"]) - log.info("Track replaced with: {.track}", item) +RE_BRACKETS = re.compile(r"[\(\[\{].*?[\)\]\}]") -# Plugin structure and hook into import process. +class FilenameMatch(MutableMapping[str, str | None]): + def __init__(self, matches: dict[str, str] | None = None) -> None: + self._matches: dict[str, str] = {} + if matches: + for key, value in matches.items(): + if value is not None: + self._matches[key.lower()] = str(value).strip() + def __getitem__(self, key: str) -> str | None: + return self._matches.get(key, None) -class FromFilenamePlugin(plugins.BeetsPlugin): - def __init__(self): + def __iter__(self) -> Iterator[str]: + return iter(self._matches) + + def __len__(self) -> int: + return len(self._matches) + + def __setitem__(self, key: str, value: str | None) -> None: + if value: + self._matches[key] = value.strip() + + def __delitem__(self, key: str) -> None: + del self._matches[key] + + def values(self) -> ValuesView[str | None]: + return self._matches.values() + + +class FromFilenamePlugin(BeetsPlugin): + def __init__(self) -> None: super().__init__() + self.config.add( + { + "fields": [ + "artist", + "album", + "albumartist", + "catalognum", + "disc", + "media", + "title", + "track", + "year", + ], + "patterns": {"folder": [], "file": []}, + "ignore_dirs": [], + "guess": {"folder": True, "file": True}, + } + ) + self.fields = set(self.config["fields"].as_str_seq()) + self.file_patterns = self._user_pattern_to_regex( + self.config["patterns"]["file"].as_str_seq() + ) + self.folder_patterns = self._user_pattern_to_regex( + self.config["patterns"]["folder"].as_str_seq() + ) + self.session_fields: set[str] = set() self.register_listener("import_task_start", self.filename_task) - def filename_task(self, task, session): - """Examine each item in the task to see if we can extract a title - from the filename. Try to match all filenames to a number of - regexps, starting with the most complex patterns and successively - trying less complex patterns. As soon as all filenames match the - same regex we can make an educated guess of which part of the - regex that contains the title. + @cached_property + def ignored_directories(self) -> set[str]: + return set([p.lower() for p in self.config["ignore_dirs"].as_str_seq()]) + + def filename_task(self, task: ImportTask, session: ImportSession) -> None: + """Examines all files in the given import task for any missing + information it can gather from the file and folder names. + + Once the information has been obtained and checked, it + is applied to the items to improve later metadata lookup. + """ + # Retrieve the list of items to process + + items: list[Item] = task.items + + # If there's no missing data to parse + if not self._check_missing_data(items): + return + # Retrieve the path characteristics to check + parent_folder, item_filenames = self._get_path_strings(items) + + album_matches = self._parse_album_info(parent_folder) + # Look for useful information in the filenames. + track_matches = self._build_track_matches(item_filenames) + # Make sure we got the fields right + self._sanity_check_matches(album_matches, track_matches) + # Apply the information + self._apply_matches(album_matches, track_matches) + + def _check_missing_data(self, items: list[Item]) -> bool: + """Look for what fields are missing data on the items. + Compare each field in self.fields to the fields on the + item. + + If all items have it, remove it from fields. + + If any items are missing it, keep it on the fields. + + If no fields are detect that need to be processed, + return false to shortcut the plugin. + """ + self.session_fields = set({}) + for field in self.fields: + # If any field is a bad field + if any([True for item in items if self._bad_field(item[field])]): + self.session_fields.add(field) + # If all fields have been removed, there is nothing to do + if not len(self.session_fields): + return False + return True + + def _user_pattern_to_regex( + self, patterns: list[str] + ) -> list[re.Pattern[str]]: + """Compile user patterns into a list of usable regex + patterns. Catches errors are continues without bad regex patterns. + """ + return [ + re.compile(regexp) + for p in patterns + if (regexp := self._parse_user_pattern_strings(p)) + ] + + def _get_path_strings( + self, items: list[Item] + ) -> tuple[str, dict[Item, str]]: + parent_folder: str = "" + filenames: dict[Item, str] = {} + for item in items: + path: Path = Path(displayable_path(item.path)) + filename = path.stem + filenames[item] = filename + if not parent_folder: + parent_folder = path.parent.stem + if parent_folder.lower() in self.ignored_directories: + parent_folder = "" + return parent_folder, filenames + + def _check_user_matches( + self, text: str, patterns: list[re.Pattern[str]] + ) -> FilenameMatch: + for p in patterns: + if usermatch := p.fullmatch(text): + return FilenameMatch(usermatch.groupdict()) + return FilenameMatch() + + def _build_track_matches( + self, item_filenames: dict[Item, str] + ) -> dict[Item, FilenameMatch]: + track_matches: dict[Item, FilenameMatch] = {} + # Check for alphanumeric indices + self._parse_alphanumeric_index(item_filenames) + for item, filename in item_filenames.items(): + if m := self._check_user_matches(filename, self.file_patterns): + track_matches[item] = m + else: + match = self._parse_track_info(filename) + track_matches[item] = match + return track_matches + + @staticmethod + def _parse_alphanumeric_index(item_filenames: dict[Item, str]) -> None: + """Before continuing to regular track matches, see if an alphanumeric + tracklist can be extracted. "A1, B1, B2" Sometimes these are followed + by a dash or dot and must be anchored to the start of the string. + + All matched patterns are extracted, and replaced with integers. + + Discs are not accounted for. + """ + + def match_index(filename: str) -> str: + m = RE_ALPHANUM_INDEX.match(filename) + if not m: + return "" + else: + return m.group() + + # Extract matches for alphanumeric indexes + indexes: list[tuple[str, Item]] = [ + (match_index(filename), item) + for item, filename in item_filenames.items() + ] + # If all the tracks do not start with a vinyl index, abort + if not all([i[0] for i in indexes]): + return + + # Utility function for sorting + def index_key(x: tuple[str, Item]): + return x[0] + + # If all have match, sort by the matched strings + indexes.sort(key=index_key) + # Iterate through all the filenames + for index, pair in enumerate(indexes): + match, item = pair + # Substitute the alnum index with an integer + new_filename = item_filenames[item].replace( + match, str(index + 1), 1 + ) + item_filenames[item] = new_filename + + @staticmethod + def _parse_track_info(text: str) -> FilenameMatch: + match = RE_TRACK_INFO.match(text) + assert match is not None + trackmatch = FilenameMatch(match.groupdict()) + # if the phrase "by" is matched, swap artist and title + if trackmatch["by"]: + artist = trackmatch["title"] + trackmatch["title"] = trackmatch["artist"] + trackmatch["artist"] = artist + # remove that key + del trackmatch["by"] + # if all fields except `track` are none + # set title to track number as well + # we can't be sure if it's actually the track number + # or track title + track = match.group("track") + if set(trackmatch.values()) == {track}: + trackmatch["title"] = track + + return trackmatch + + def _parse_album_info(self, text: str) -> FilenameMatch: + matches = FilenameMatch() + + if not self.config["guess"]["folder"] or ( + config["import"]["group_albums"] or config["import"]["singletons"] + ): + # If the group albums flag is thrown, we can't trust the parent directory + # likewise for singletons - return an empty match + return matches + # Check if a user pattern matches + if m := self._check_user_matches(text, self.folder_patterns): + return m + # Start with the extra fields to make parsing + # the album artist and artist field easier + year, span = self._parse_year(text) + if year: + # Remove it from the string if found + text = self._mutate_string(text, span) + matches["year"] = year + + # Look for the catalog number, it must be in brackets + # It will not contain the filetype, flac, mp3, wav, etc + catalognum, span = self._parse_catalognum(text) + if catalognum: + text = self._mutate_string(text, span) + matches["catalognum"] = catalognum + # Look for a media type + media, span = self._parse_media(text) + if media: + text = self._mutate_string(text, span) + matches["media"] = media + + # Remove anything left within brackets + brackets = RE_BRACKETS.search(text) + while brackets: + span = brackets.span() + text = self._mutate_string(text, span) + brackets = RE_BRACKETS.search(text) + # Remaining text used for album, albumartist + album, albumartist = self._parse_album_and_albumartist(text) + matches["album"] = album + matches["albumartist"] = albumartist + + return matches + + def _apply_matches( + self, + album_match: FilenameMatch, + track_matches: dict[Item, FilenameMatch], + ) -> None: + """Apply all valid matched fields to all items in the match dictionary.""" + match = dict(album_match._matches) + for item in track_matches: + match.update(track_matches[item]._matches) + found_data: dict[str, int | str] = {} + self._log.debug(f"keys: {', '.join(match.keys())}") + # Check every key we are supposed to match. + for key in match.keys(): + # If the key is applicable to the session, we will update it. + if key in self.session_fields: + old_value = item.get(key) + new_value = match[key] + # If the field is bad, and we have a new value + if self._bad_field(old_value) and new_value: + found_data[key] = new_value + self._log.info(f"guessing {self._format_guesses(found_data)}") + item.update(found_data) + + @staticmethod + def _format_guesses(guesses: dict[str, int | str]) -> str: + """Format guesses in a 'field="guess"' style for logging""" + return ", ".join([f'{g[0]}="{g[1]}"' for g in guesses.items()]) + + @staticmethod + def _parse_album_and_albumartist( + text: str, + ) -> tuple[str | None, str | None]: + """Takes the remaining string and splits it along common dividers. + Assumes the first field to be the albumartist and the last field to be the + album. Checks against various artist fields. + """ + possible_albumartist = None + possible_album = None + # What is left we can assume to contain the title and artist + remaining = [ + f for field in RE_SPLIT.split(text) if (f := field.strip()) + ] + if remaining: + # If two fields remain, assume artist and album artist + if len(remaining) == 2: + possible_albumartist = remaining[0] + possible_album = remaining[1] + # Look for known album artists + # VA, Various, Vartious Artists will all result in + # using the beets VA default for album artist name + # assume the artist comes before the title in most situations + if RE_VARIOUS.match(possible_album): + possible_album = possible_albumartist + possible_albumartist = config["va_name"].as_str() + elif RE_VARIOUS.match(possible_albumartist): + possible_albumartist = config["va_name"].as_str() + else: + # If one field remains, assume album title + possible_album = remaining[0].strip() + return possible_album, possible_albumartist + + @staticmethod + def _parse_year(text: str) -> tuple[str | None, tuple[int, int]]: + """The year will be a four digit number. The search goes + through a list of ordered patterns to try and find the year. + To be a valid year, it must be less than the current year. + """ + current_year = datetime.now().year + year = None + span = (0, 0) + for exp in YEAR_REGEX: + match = exp.search(text) + if not match: + continue + year_candidate = match.group("year") + # If the year is matched and not in the future + if year_candidate and int(year_candidate) <= current_year: + year = year_candidate + span = match.span() + break + return year, span + + @staticmethod + def _parse_media(text: str) -> tuple[str | None, tuple[int, int]]: + """Look for the media type, we are only interested in a few common + types - CD, Vinyl, Cassette or WEB. To avoid overreach, in the + case of titles containing a medium, only searches for media types + within a pair of brackets. + """ + mappings = { + "cd": "CD", + "vinyl": "Vinyl", + "web": "Digital Media", + "cassette": "Cassette", + } + match = RE_MEDIA_TYPE.search(text) + if match: + media = None + for key, value in match.groupdict().items(): + if value: + media = mappings[key] + return media, match.span() + return None, (0, 0) + + @staticmethod + def _parse_catalognum(text: str) -> tuple[str | None, tuple[int, int]]: + match = RE_CATALOGNUM.search(text) + # assert that it cannot be mistaken for a media type + if match and not RE_MEDIA_TYPE.match(match[0]): + return match.group("catalognum"), match.span() + return None, (0, 0) + + def _parse_user_pattern_strings(self, text: str) -> str | None: + # escape any special characters + fields: list[str] = [ + s.lower() for s in re.findall(r"\$([a-zA-Z\_]+)", text) + ] + if not fields: + # if there are no usable fields + return None + pattern = re.escape(text) + for f in fields: + pattern = re.sub(rf"\\\${f}", f"(?P<{f}>.+)", pattern) + self.fields.add(f) + return pattern + + @staticmethod + def _mutate_string(text: str, span: tuple[int, int]) -> str: + """Replace a matched field with a seperator""" + start, end = span + text = text[:start] + "-" + text[end:] + return text + + def _sanity_check_matches( + self, + album_match: FilenameMatch, + track_matches: dict[Item, FilenameMatch], + ) -> None: + """Check to make sure data is coherent between + track and album matches. Largely looking to see + if the arist and album artist fields are properly + identified. + """ + + def swap_artist_title(tracks: list[FilenameMatch]) -> None: + for track in tracks: + artist = track["title"] + track["title"] = track["artist"] + track["artist"] = artist + # swap the track titles and track artists + self._log.info("Swapped title and artist fields.") + + # None of this logic applies if there's only one track + if len(track_matches) < 2: + return + + tracks: list[FilenameMatch] = list(track_matches.values()) + album_artist = album_match["albumartist"] + one_artist = self._equal_fields(tracks, "artist") + one_title = self._equal_fields(tracks, "title") + + if not album_artist or album_artist != config["va_name"].as_str(): + if one_artist and not one_title: + # All the artist fields match, and the title fields don't + # It's probably the artist + return + elif one_title and not one_artist and not album_artist: + # If the track titles match, and there's no album + # artist to check on + swap_artist_title(tracks) + elif album_artist: + # The artist fields don't match, and the title fields don't match + # If the albumartist field matches any track, then we know + # that the track field is likely the artist field. + # Sometimes an album has a presenter credited + track_titles = [str(t["title"]).upper() for t in tracks] + if album_artist and album_artist.upper() in track_titles: + swap_artist_title(tracks) + return + + @staticmethod + def _equal_fields(dictionaries: list[FilenameMatch], field: str) -> bool: + """Checks if all values of a field on a dictionary match.""" + return len(set(d[field] for d in dictionaries)) <= 1 + + @staticmethod + def _bad_field(field: str | int) -> bool: + """Determine whether a given title is "bad" (empty or otherwise + meaningless) and in need of replacement. """ - items = task.items if task.is_album else [task.item] - - # Look for suspicious (empty or meaningless) titles. - missing_titles = sum(bad_title(i.title) for i in items) - - if missing_titles: - # Get the base filenames (no path or extension). - names = {} - for item in items: - path = displayable_path(item.path) - name, _ = os.path.splitext(os.path.basename(path)) - names[item] = name - - # Look for useful information in the filenames. - for pattern in PATTERNS: - self._log.debug(f"Trying pattern: {pattern}") - d = all_matches(names, pattern) - if d: - apply_matches(d, self._log) + if isinstance(field, int): + return True if field <= 0 else False + return True if RE_BAD_FIELD.match(field) else False diff --git a/docs/plugins/fromfilename.rst b/docs/plugins/fromfilename.rst index e78677b86c..1c48caffbe 100644 --- a/docs/plugins/fromfilename.rst +++ b/docs/plugins/fromfilename.rst @@ -5,8 +5,93 @@ The ``fromfilename`` plugin helps to tag albums that are missing tags altogether but where the filenames contain useful information like the artist and title. When you attempt to import a track that's missing a title, this plugin will look -at the track's filename and guess its track number, title, and artist. These -will be used to search in MusicBrainz and match track ordering. +at the track's filename and parent folder, and guess a number of fields. + +The extracted information will be used to search for metadata and match track +ordering. To use the ``fromfilename`` plugin, enable it in your configuration (see :ref:`using-plugins`). + +Configuration +------------- + +Configuration for ``fromfilename`` allows you to choose what fields the plugin +attempts to contribute to files missing information, as well as specify extra +patterns to match. + +Default +~~~~~~~ + +.. code-block:: yaml + + fromfilename: + fields: + - artist + - album + - albumartist + - catalognum + - disc + - media + - title + - track + - year + patterns: + file: [] + folder: [] + ignore_dirs: + +.. conf:: fields + :default: [ artist, album, albumartist, catalognum, disc, media, title, track, year ] + + The fields the plugin will guess with its default pattern matching. + + By default, the plugin is configured to match all fields its default + patterns are capable of matching. + + If a field is specified in a user pattern, that field does not need + to be present on this list to be applied. + + If you only want the plugin to contribute the track title and artist, + you would put ``[title, artist]``. + +.. conf:: patterns + + Users can specify patterns to expand the set of filenames that can + be recognized by the plugin. Patterns can be specified as ``file`` + or ``folder`` patterns. ``file`` patterns are checked against the filename. + ``folder`` patterns are checked against the parent folder of the file. + + If ``fromfilename`` can't match the entire string to one of the given pattern, it will + fall back to the default pattern. + + For example, the following custom patterns will match this path and folder, + and retrieve the specified fields. + + ``/music/James Lawson - 841689 (2004)/Coming Up - James Lawson & Andy Farley.mp3`` + + .. code-block:: yaml + + patterns: + folder: + - "$albumartist - $discogs_albumid ($year)" + file: + - "$title - $artist" + +.. conf:: ignore_dirs + :default: [] + + Specify parent directory names that will not be searched for album + information. Useful if you use a regular directory for importing + single files. + +.. conf:: guess + + Disable guessing from the folder or filename. Be aware that disabling both + will cause the plugin to have no effect! + + .. code-block:: yaml + + guess: + folder: yes + file: yes diff --git a/test/plugins/test_fromfilename.py b/test/plugins/test_fromfilename.py index f13e88aea2..cf444993d7 100644 --- a/test/plugins/test_fromfilename.py +++ b/test/plugins/test_fromfilename.py @@ -13,87 +13,808 @@ """Tests for the fromfilename plugin.""" +from copy import deepcopy +from unittest.mock import patch + import pytest -from beetsplug import fromfilename +from beets.importer.tasks import ImportTask, SingletonImportTask +from beets.library import Item +from beets.test.helper import PluginMixin +from beetsplug.fromfilename import FilenameMatch, FromFilenamePlugin class Session: + """Mock session, not used by the plugin.""" + pass -class Item: - def __init__(self, path): - self.path = path - self.track = 0 - self.artist = "" - self.title = "" +def mock_item(**kwargs): + """Mock item with blank defaults.""" + defaults = dict( + title="", + artist="", + albumartist="", + album="", + disc=0, + track=0, + catalognum="", + media="", + mtime=12345, + ) + return Item(**{**defaults, **kwargs}) -class Task: - def __init__(self, items): - self.items = items - self.is_album = True +def mock_task(items): + """Mock task for ease of testing.""" + return ImportTask(toppath=None, paths=None, items=items) @pytest.mark.parametrize( - "song1, song2", + "text,matchgroup", [ + ("3", FilenameMatch({"track": "3", "title": "3"})), + ("04", FilenameMatch({"track": "04", "title": "04"})), + ("6.", FilenameMatch({"track": "6", "title": "6"})), + ("3.5", FilenameMatch({"disc": "3", "track": "5"})), + ("1-02", FilenameMatch({"disc": "1", "track": "02"})), + ("100-4", FilenameMatch({"disc": "100", "track": "4"})), ( - ( - "/tmp/01 - The Artist - Song One.m4a", - 1, - "The Artist", - "Song One", + "04.Title", + FilenameMatch({"track": "04", "title": "Title"}), + ), + ( + "5_-_Title", + FilenameMatch({"track": "5", "title": "Title"}), + ), + ( + "1-02 Title", + FilenameMatch({"disc": "1", "track": "02", "title": "Title"}), + ), + ( + "3.5 - Title", + FilenameMatch({"disc": "3", "track": "5", "title": "Title"}), + ), + ( + "5_-_Artist_-_Title", + FilenameMatch({"track": "5", "artist": "Artist", "title": "Title"}), + ), + ( + "3-8- Artist-Title", + FilenameMatch( + { + "disc": "3", + "track": "8", + "artist": "Artist", + "title": "Title", + } ), - ( - "/tmp/02. - The Artist - Song Two.m4a", - 2, - "The Artist", - "Song Two", + ), + ( + "4-3 - Artist Name - Title", + FilenameMatch( + { + "disc": "4", + "track": "3", + "artist": "Artist Name", + "title": "Title", + } + ), + ), + ( + "4-3_-_Artist_Name_-_Title", + FilenameMatch( + { + "disc": "4", + "track": "3", + "artist": "Artist_Name", + "title": "Title", + } + ), + ), + ( + "6 Title by Artist", + FilenameMatch({"track": "6", "artist": "Artist", "title": "Title"}), + ), + ( + "Title", + FilenameMatch({"title": "Title"}), + ), + ], +) +def test_parse_track_info(text, matchgroup): + """Test parsing track information from a filename.""" + f = FromFilenamePlugin() + m = f._parse_track_info(text) + assert dict(matchgroup.items()) == dict(m.items()) + + +@pytest.mark.parametrize( + "text,matchgroup", + [ + ( + # highly unlikely + "", + FilenameMatch(), + ), + ( + "1970", + FilenameMatch( + { + "year": "1970", + } + ), + ), + ( + "Album Title", + FilenameMatch( + { + "album": "Album Title", + } + ), + ), + ( + "Artist - Album Title", + FilenameMatch( + { + "albumartist": "Artist", + "album": "Album Title", + } + ), + ), + ( + "Artist - Album Title (2024)", + FilenameMatch( + { + "albumartist": "Artist", + "album": "Album Title", + "year": "2024", + } + ), + ), + ( + "Artist - 2024 - Album Title [flac]", + FilenameMatch( + { + "albumartist": "Artist", + "album": "Album Title", + "year": "2024", + } + ), + ), + ( + "(2024) Album Title [CATALOGNUM] WEB", + # sometimes things are just going to be unparsable + FilenameMatch( + { + "albumartist": "Album Title", + "album": "WEB", + "year": "2024", + "catalognum": "CATALOGNUM", + } + ), + ), + ( + "{2024} Album Artist - Album Title [INFO-WAV]", + FilenameMatch( + { + "albumartist": "Album Artist", + "album": "Album Title", + "year": "2024", + } + ), + ), + ( + "VA - Album Title [2025] [CD-FLAC]", + FilenameMatch( + { + "albumartist": "Various Artists", + "album": "Album Title", + "year": "2025", + "media": "CD", + } + ), + ), + ( + "Artist - Album Title 3000 (1998) [FLAC] {CATALOGNUM}", + FilenameMatch( + { + "albumartist": "Artist", + "album": "Album Title 3000", + "year": "1998", + "catalognum": "CATALOGNUM", + } ), ), ( - ("/tmp/01-The_Artist-Song_One.m4a", 1, "The_Artist", "Song_One"), - ("/tmp/02.-The_Artist-Song_Two.m4a", 2, "The_Artist", "Song_Two"), + "various - cd album (2023) [catalognum 123] {vinyl mp3}", + FilenameMatch( + { + "albumartist": "Various Artists", + "album": "cd album", + "year": "2023", + "catalognum": "catalognum 123", + "media": "Vinyl", + } + ), ), ( - ("/tmp/01 - Song_One.m4a", 1, "", "Song_One"), - ("/tmp/02. - Song_Two.m4a", 2, "", "Song_Two"), + "[CATALOG567] Album - Various (2020) [WEB-FLAC]", + FilenameMatch( + { + "albumartist": "Various Artists", + "album": "Album", + "year": "2020", + "catalognum": "CATALOG567", + "media": "Digital Media", + } + ), ), ( - ("/tmp/Song One by The Artist.m4a", 0, "The Artist", "Song One"), - ("/tmp/Song Two by The Artist.m4a", 0, "The Artist", "Song Two"), + "Album 3000 {web}", + FilenameMatch( + { + "album": "Album 3000", + "media": "Digital Media", + } + ), ), - (("/tmp/01.m4a", 1, "", "01"), ("/tmp/02.m4a", 2, "", "02")), + ], +) +def test_parse_album_info(text, matchgroup): + """Test parsing album information from a folder name.""" + f = FromFilenamePlugin() + m = f._parse_album_info(text) + assert matchgroup == m + + +@pytest.mark.parametrize( + "string,pattern", + [ ( - ("/tmp/Song One.m4a", 0, "", "Song One"), - ("/tmp/Song Two.m4a", 0, "", "Song Two"), + "$albumartist - $album ($year) {$comments}", + ( + r"(?P<albumartist>.+)\ \-\ (?P<album>.+)\ " + r"\((?P<year>.+)\)\ \ \{(?P<comments>.+)\}" + ), ), + ("$", None), ], ) -def test_fromfilename(song1, song2): - """ - Each "song" is a tuple of path, expected track number, expected artist, - expected title. - - We use two songs for each test for two reasons: - - The plugin needs more than one item to look for uniform strings in paths - in order to guess if the string describes an artist or a title. - - Sometimes we allow for an optional "." after the track number in paths. - """ - - session = Session() - item1 = Item(song1[0]) - item2 = Item(song2[0]) - task = Task([item1, item2]) - - f = fromfilename.FromFilenamePlugin() - f.filename_task(task, session) - - assert task.items[0].track == song1[1] - assert task.items[0].artist == song1[2] - assert task.items[0].title == song1[3] - assert task.items[1].track == song2[1] - assert task.items[1].artist == song2[2] - assert task.items[1].title == song2[3] +def test_parse_user_pattern_strings(string, pattern): + """Test converting a user's format string to regexp""" + f = FromFilenamePlugin() + assert f._parse_user_pattern_strings(string) == pattern + + +class TestFromFilename(PluginMixin): + plugin = "fromfilename" + preload_plugin = False + + @pytest.mark.parametrize( + "expected_item", + [ + mock_item( + path="/tmp/01 - The Artist - Song One.m4a", + artist="The Artist", + track=1, + title="Song One", + ), + mock_item( + path="/tmp/01 The Artist - Song One.m4a", + artist="The Artist", + track=1, + title="Song One", + ), + mock_item( + path="/tmp/02 The Artist - Song Two.m4a", + artist="The Artist", + track=2, + title="Song Two", + ), + mock_item( + path="/tmp/01-The_Artist-Song_One.m4a", + artist="The_Artist", + track=1, + title="Song_One", + ), + mock_item( + path="/tmp/02.-The_Artist-Song_Two.m4a", + artist="The_Artist", + track=2, + title="Song_Two", + ), + mock_item( + path="/tmp/01 - Song_One.m4a", + track=1, + title="Song_One", + ), + mock_item( + path="/tmp/02. - Song_Two.m4a", + track=2, + title="Song_Two", + ), + mock_item( + path="/tmp/Song One by The Artist.m4a", + artist="The Artist", + title="Song One", + ), + mock_item( + path="/tmp/Song Two by The Artist.m4a", + artist="The Artist", + title="Song Two", + ), + mock_item( + path="/tmp/01.m4a", + track=1, + title="01", + ), + mock_item( + path="/tmp/02.m4a", + track=2, + title="02", + ), + mock_item( + path="/tmp/Song One.m4a", + title="Song One", + ), + mock_item( + path="/tmp/Song Two.m4a", + title="Song Two", + ), + mock_item( + path=( + "/tmp/" + "[CATALOG567] Album - Various - [WEB-FLAC]" + "/2-10 - Artist - Song One.m4a" + ), + album="Album", + artist="Artist", + track=10, + disc=2, + albumartist="Various Artists", + catalognum="CATALOG567", + title="Song One", + media="Digital Media", + ), + mock_item( + path=( + "/tmp/" + "[CATALOG567] Album - Various - [WEB-FLAC]" + "/03-04 - Other Artist - Song Two.m4a" + ), + album="Album", + artist="Other Artist", + disc=3, + track=4, + albumartist="Various Artists", + catalognum="CATALOG567", + title="Song Two", + media="Digital Media", + ), + ], + ) + def test_fromfilename(self, expected_item): + """ + Take expected items, create a task with just the paths. + + After parsing, compare to the original with the expected attributes defined. + """ + task = mock_task(items=[mock_item(path=expected_item.path)]) + f = FromFilenamePlugin() + f.filename_task(task, Session()) + res = task.items[0] + exp = expected_item + assert res.path == exp.path + assert res.artist == exp.artist + assert res.albumartist == exp.albumartist + assert res.disc == exp.disc + assert res.catalognum == exp.catalognum + assert res.year == exp.year + assert res.title == exp.title + + @pytest.mark.parametrize( + "expected_items", + [ + [ + mock_item( + path="/Artist - Album/01 - Track1 - Performer.flac", + track=1, + title="Track1", + album="Album", + albumartist="Artist", + artist="Performer", + ), + mock_item( + path="/Artist - Album/02 - Track2 - Artist.flac", + track=2, + title="Track2", + album="Album", + albumartist="Artist", + artist="Artist", + ), + ], + [ + mock_item( + path=( + "/DiY - 8 Definitions of Bounce/" + "01 - Essa - Definition of Bounce.flac" + ), + track=1, + title="Definition of Bounce", + albumartist="DiY", + album="8 Definitions of Bounce", + artist="Essa", + ), + mock_item( + path=( + "/DiY - 8 Definitions of Bounce/" + "02 - Digs - Definition of Bounce.flac" + ), + track=2, + title="Definition of Bounce", + album="8 Definitions of Bounce", + albumartist="DiY", + artist="Digs", + ), + ], + [ + mock_item( + path=("/Essa - Magneto Essa/1 - Essa - Magneto Essa.flac"), + track=1, + title="Magneto Essa", + album="Magneto Essa", + albumartist="Essa", + artist="Essa", + ), + mock_item( + path=("/Essa - Magneto Essa/2 - Essa - The Immortals.flac"), + track=2, + title="The Immortals", + album="Magneto Essa", + albumartist="Essa", + artist="Essa", + ), + ], + [ + mock_item( + path=("/Magneto Essa/1 - Magneto Essa - Essa.flac"), + track=1, + title="Magneto Essa", + album="Magneto Essa", + artist="Essa", + ), + mock_item( + path=("/Magneto Essa/2 - The Immortals - Essa.flac"), + track=2, + title="The Immortals", + album="Magneto Essa", + artist="Essa", + ), + ], + [ + # Even though it might be clear to human eyes, + # we can't guess since the various flag is thrown + mock_item( + path=( + "/Various - 303 Alliance 012/" + "1 - The End of Satellite - Benji303.flac" + ), + track=1, + title="Benji303", + album="303 Alliance 012", + artist="The End of Satellite", + albumartist="Various Artists", + ), + mock_item( + path=( + "/Various - 303 Alliance 012/" + "2 - Ruff Beats - Benji303.flac" + ), + track=2, + title="Benji303", + album="303 Alliance 012", + artist="Ruff Beats", + albumartist="Various Artists", + ), + ], + [ + mock_item( + path=( + "/303 Alliance 012/" + "1 - The End of Satellite - Benji303.flac" + ), + track=1, + title="Benji303", + album="303 Alliance 012", + artist="The End of Satellite", + ), + mock_item( + path=( + "/303 Alliance 012/" + "2 - Ruff Beats - Benji303 & Sam J.flac" + ), + track=2, + title="Benji303 & Sam J", + album="303 Alliance 012", + artist="Ruff Beats", + ), + ], + ], + ) + def test_sanity_check(self, expected_items): + """Take a list of expected items, create a task with just the paths. + Assert the conditions that cause sanity check to change artist and title + fields. + """ + task = mock_task([mock_item(path=item.path) for item in expected_items]) + f = FromFilenamePlugin() + f.filename_task(task, Session()) + res = task.items + exp = expected_items + assert res[0].path == exp[0].path + assert res[0].artist == exp[0].artist + assert res[0].albumartist == exp[0].albumartist + assert res[0].disc == exp[0].disc + assert res[0].catalognum == exp[0].catalognum + assert res[0].year == exp[0].year + assert res[0].title == exp[0].title + assert res[1].path == exp[1].path + assert res[1].artist == exp[1].artist + assert res[1].albumartist == exp[1].albumartist + assert res[1].disc == exp[1].disc + assert res[1].catalognum == exp[1].catalognum + assert res[1].year == exp[1].year + assert res[1].title == exp[1].title + + def test_singleton_import(self): + """Ensure that singletons behave correctly.""" + task = SingletonImportTask( + toppath=None, item=mock_item(path="/01 Track.wav") + ) + f = FromFilenamePlugin() + f.filename_task(task, Session()) + assert task.item.track == 1 + assert task.item.title == "Track" + + def test_item_with_existing_data(self): + """Ensure that existing metadata is not overwritten, no matter + how incorrect it may be.""" + path = "/Album Artist - Album (1999)/01 - Track Title.wav" + albumartist = "Other Artist" + title = "Existing Title" + given = mock_item( + path=path, + albumartist=albumartist, + album=" ", + title=title, + year=2024, + ) + f = FromFilenamePlugin() + f.filename_task(mock_task([given]), Session()) + assert given.title == title + assert given.albumartist == albumartist + assert given.album == "Album" + assert given.year == 2024 + + @pytest.mark.parametrize( + "fields,expected", + [ + ( + [ + "albumartist", + "album", + "year", + "media", + "catalognum", + "artist", + "track", + "disc", + "title", + ], + mock_item( + albumartist="Album Artist", + album="Album", + year="2025", + media="CD", + catalognum="CATALOGNUM", + disc=1, + track=2, + artist="Artist", + title="Track", + ), + ), + ( + ["album", "year", "media", "track", "disc", "title"], + mock_item( + album="Album", + year="2025", + media="CD", + disc=1, + title="Track", + ), + ), + ], + ) + def test_fields(self, fields, expected): + """Test that the applied fields can be adjusted by the user.""" + path = ( + "/Album Artist - Album (2025) [FLAC CD] {CATALOGNUM}/" + "1-2 Artist - Track.wav" + ) + task = mock_task([mock_item(path=path)]) + expected.path = path + with self.configure_plugin({"fields": fields}): + f = FromFilenamePlugin() + f.filename_task(task, Session()) + res = task.items[0] + assert res.path == expected.path + assert res.artist == expected.artist + assert res.albumartist == expected.albumartist + assert res.disc == expected.disc + assert res.catalognum == expected.catalognum + assert res.year == expected.year + assert res.title == expected.title + + @pytest.mark.parametrize( + "patterns,expected", + [ + ( + { + "folder": ["($comments) - {$albumartist} - {$album}"], + "file": ["$artist - $track - $title"], + }, + mock_item( + path=( + "/(Comment) - {Album Artist} - {Album}" + "/Artist - 02 - Title.flac" + ), + comments="Comment", + albumartist="Album Artist", + album="Album", + artist="Artist", + track=2, + title="Title", + ), + ), + ( + { + "folder": ["[$comments] - {$albumartist} - {$album}"], + "file": ["$artist - $track - $title"], + }, + mock_item( + path=( + "/(Comment) - {Album Artist} - {Album}" + "/Artist - 02 - Title.flac" + ), + artist="Artist", + track=2, + title="Title", + catalognum="Comment", + ), + ), + ], + ) + def test_user_patterns(self, patterns, expected): + """Test recognizing data from a given user pattern.""" + task = mock_task([mock_item(path=expected.path)]) + with self.configure_plugin({"patterns": patterns}): + f = FromFilenamePlugin() + f.filename_task(task, Session()) + res = task.items[0] + assert res.comments == expected.comments + assert res.path == expected.path + assert res.artist == expected.artist + assert res.albumartist == expected.albumartist + assert res.disc == expected.disc + assert res.catalognum == expected.catalognum + assert res.year == expected.year + assert res.title == expected.title + + @pytest.mark.parametrize( + "expected", + [ + ( + mock_item(path="/temp/A - track.wav", track=1), + mock_item(path="/temp/B - track.wav", track=2), + mock_item(path="/temp/C - track.wav", track=3), + ), + # Test with numbers + ( + mock_item(path="/temp/A1 - track.wav", track=1), + mock_item(path="/temp/A2 - track.wav", track=2), + mock_item(path="/temp/B1 - track.wav", track=3), + ), + # Test out of order + ( + mock_item(path="/temp/Z - track.wav", track=3), + mock_item(path="/temp/X - track.wav", track=1), + mock_item(path="/temp/Y - track.wav", track=2), + ), + ], + ) + def test_alphanumeric_index(self, expected): + """Assert that an alphanumeric index is guessed in order.""" + task = mock_task([mock_item(path=item.path) for item in expected]) + f = FromFilenamePlugin() + f.filename_task(task, Session()) + assert task.items[0].track == expected[0].track + assert task.items[1].track == expected[1].track + assert task.items[2].track == expected[2].track + + def test_no_guesses(self): + """Assert that an item with complete information is + has no guesses attempted.""" + item = mock_item( + path="/Folder/File.wav", + albumartist="AlbumArtist", + artist="Artist", + title="Title", + ) + fields = ["artist", "title", "albumartist"] + task = mock_task([item]) + with self.configure_plugin({"fields": fields}): + with patch.object(FromFilenamePlugin, "_get_path_strings") as mock: + f = FromFilenamePlugin() + f.filename_task(task, Session()) + mock.assert_not_called() + + def test_only_one_guess(self): + """Assert that an item missing only one value + will just have that key in session fields.""" + item = mock_item( + path="/Folder/File.wav", + albumartist="AlbumArtist", + artist="Artist", + title="Title", + ) + item2 = deepcopy(item) + item2.title = "" + fields = ["artist", "title", "albumartist"] + task = mock_task([item, item2]) + with self.configure_plugin({"fields": fields}): + with patch.object( + FromFilenamePlugin, + "_get_path_strings", + return_value=("mock", {item: "mock"}), + ) as mock: + f = FromFilenamePlugin() + f.filename_task(task, Session()) + assert len(f.session_fields) == 1 + assert "title" in f.fields + mock.assert_called() + + def test_ignored_directories(self): + """Assert that a given parent directory name is ignored.""" + ignored = "Incoming" + item = mock_item(path="/tmp/" + ignored + "/01 - File.wav") + with self.configure_plugin({"ignore_dirs": [ignored]}): + f = FromFilenamePlugin() + parent_folder, _ = f._get_path_strings([item]) + assert parent_folder == "" + + def test_guess_folder(self): + """Assert that from filename does not + guess from the folder, if guess folder is `no`.""" + return + + def test_guess_file(self): + """Assert that from filename does not guess + from the file, if guess file is `no`.""" + return + + def test_singleton_flag_import(self): + """If the import task is a singleton, assert that + the plugin does not guess from the folder.""" + return + + def test_group_album_flag_import(self): + """If the group albums flag is thrown, assert + that the plugin does not guess from the folder.""" + return + + def test_import_split_by_group(self): + """Asser that an initial run without group by album, and an inaccurate + album guess, results in a run omitting it with the group album flag.""" + return