Skip to content

Commit e3af889

Browse files
committed
Fix artist name splitting with semicolons and Vorbis multi-field handling
1 parent 1de11be commit e3af889

File tree

2 files changed

+204
-13
lines changed

2 files changed

+204
-13
lines changed

music_assistant/helpers/tags.py

Lines changed: 49 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -282,9 +282,25 @@ def album(self) -> str | None:
282282
@property
283283
def artists(self) -> tuple[str, ...]:
284284
"""Return track artists."""
285-
# prefer multi-artist tag (ARTISTS plural)
285+
# Check for "artists" key which can come from two sources:
286+
# 1. ID3: TXXX:ARTISTS - a MusicBrainz/Picard extension using semicolon-delimited values
287+
# in a single field (ID3 doesn't support multiple instances of the same tag).
288+
# See: https://picard-docs.musicbrainz.org/en/appendices/tag_mapping.html
289+
# 2. Vorbis: Multiple ARTIST (singular) fields per the spec, stored as list by mutagen.
290+
# See: https://xiph.org/vorbis/doc/v-comment.html
286291
if tag := self.tags.get("artists"):
287-
artists = split_items(tag)
292+
# Runtime check: mutagen returns list[str] for Vorbis multi-field
293+
if isinstance(tag, list) and len(tag) > 1: # type: ignore[unreachable]
294+
# Multiple ARTISTS fields in Vorbis - non-standard tagging.
295+
# Should use multiple ARTIST (singular) fields instead.
296+
LOGGER.warning( # type: ignore[unreachable]
297+
"Multiple ARTISTS fields found. Use multiple ARTIST fields instead: %s",
298+
tag,
299+
)
300+
artists = clean_tuple(tag)
301+
else:
302+
# Single field (ID3 TXXX:ARTISTS) - split on semicolons
303+
artists = split_items(tag)
288304
# Warn if ARTISTS tag count doesn't match MB Artist ID count
289305
mb_id_count = len(self.musicbrainz_artistids)
290306
if mb_id_count and mb_id_count != len(artists):
@@ -295,15 +311,17 @@ def artists(self) -> tuple[str, ...]:
295311
tag,
296312
)
297313
return artists
298-
# fallback to regular artist string
314+
# Fallback to ARTIST (singular) tag.
315+
# Use MusicBrainz Artist ID count to determine splitting behavior:
316+
# - 1 ID: single artist confirmed, preserve as-is (handles artist names with semicolons)
317+
# - 2+ IDs: split to match expected count
318+
# - 0 IDs: apply standard splitting heuristics
299319
if tag := self.tags.get("artist"):
320+
mb_id_count = len(self.musicbrainz_artistids)
321+
if mb_id_count == 1:
322+
return (tag,)
300323
if TAG_SPLITTER in tag:
301324
return split_items(tag)
302-
# Use MB artist ID count to guide splitting
303-
# - 0 IDs: only split on "feat." etc., not on "&" or ","
304-
# - 1 ID: don't split at all
305-
# - 2+ IDs: split to match the expected count
306-
mb_id_count = len(self.musicbrainz_artistids)
307325
return split_artists(tag, expected_count=mb_id_count if mb_id_count else None)
308326
# fallback to parsing from filename
309327
title = self.filename.rsplit(os.sep, 1)[-1].split(".")[0]
@@ -331,9 +349,25 @@ def writers(self) -> tuple[str, ...]:
331349
@property
332350
def album_artists(self) -> tuple[str, ...]:
333351
"""Return (all) album artists (if any)."""
334-
# prefer multi-artist tag (ALBUMARTISTS plural)
352+
# Check for "albumartists" key which can come from two sources:
353+
# 1. ID3: TXXX:ALBUMARTISTS - a MusicBrainz/Picard extension using semicolon-delimited
354+
# values (TPE2 multi-value support is inconsistent across players).
355+
# See: https://picard-docs.musicbrainz.org/en/appendices/tag_mapping.html
356+
# 2. Vorbis: Multiple ALBUMARTIST (singular) fields, stored as list by mutagen.
357+
# See: https://xiph.org/vorbis/doc/v-comment.html
335358
if tag := self.tags.get("albumartists"):
336-
artists = split_items(tag)
359+
# Runtime check: mutagen returns list[str] for Vorbis multi-field
360+
if isinstance(tag, list) and len(tag) > 1: # type: ignore[unreachable]
361+
# Multiple ALBUMARTISTS fields in Vorbis - non-standard tagging.
362+
LOGGER.warning( # type: ignore[unreachable]
363+
"Multiple ALBUMARTISTS fields found. Use multiple ALBUMARTIST fields "
364+
"instead: %s",
365+
tag,
366+
)
367+
artists = clean_tuple(tag)
368+
else:
369+
# Single field (ID3 TXXX:ALBUMARTISTS) - split on semicolons
370+
artists = split_items(tag)
337371
# Warn if ALBUMARTISTS tag count doesn't match MB Album Artist ID count
338372
mb_id_count = len(self.musicbrainz_albumartistids)
339373
if mb_id_count and mb_id_count != len(artists):
@@ -345,12 +379,14 @@ def album_artists(self) -> tuple[str, ...]:
345379
tag,
346380
)
347381
return artists
348-
# fallback to regular album artist string
382+
# Fallback to ALBUMARTIST (singular) tag.
383+
# Use MusicBrainz Album Artist ID count to determine splitting behavior.
349384
if tag := self.tags.get("albumartist"):
385+
mb_id_count = len(self.musicbrainz_albumartistids)
386+
if mb_id_count == 1:
387+
return (tag,)
350388
if TAG_SPLITTER in tag:
351389
return split_items(tag)
352-
# Use MB album artist ID count to guide splitting
353-
mb_id_count = len(self.musicbrainz_albumartistids)
354390
return split_artists(tag, expected_count=mb_id_count if mb_id_count else None)
355391
return ()
356392

tests/core/test_tags.py

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -419,3 +419,158 @@ def test_parse_apev2_tags_genre_multi_value() -> None:
419419
result = _parse_apev2_tags(mock_tags)
420420

421421
assert result.get("genre") == ["Rock", "Pop", "Jazz"]
422+
423+
424+
def test_vorbis_multiple_artist_fields_semicolon_in_name() -> None:
425+
"""Test that multiple ARTIST fields in Vorbis with semicolons are handled correctly.
426+
427+
Regression test for the "ave;new" edge case per the Vorbis spec:
428+
- Japanese artist "ave;new" has a semicolon in their name
429+
- Vorbis allows multiple ARTIST (singular) fields for multi-artist tracks
430+
- The semicolon within "ave;new" must NOT cause additional splitting
431+
432+
Correct Vorbis tagging (per https://xiph.org/vorbis/doc/v-comment.html):
433+
ARTIST=ave;new
434+
ARTIST=佐倉紗織
435+
MUSICBRAINZ_ARTISTID=2ade7b3c-a6f1-4d00-b7f7-fc60abf25dba
436+
MUSICBRAINZ_ARTISTID=822c07bd-1f8a-4fef-acdb-8acfe82fbef5
437+
438+
See: https://musicbrainz.org/artist/2ade7b3c-a6f1-4d00-b7f7-fc60abf25dba
439+
"""
440+
# Simulate Vorbis tags with multiple ARTIST fields (correct per Vorbis spec)
441+
mock_tags = _create_mock_vorbis_tags(
442+
{
443+
"TITLE": ["Call My Dears"],
444+
"ALBUM": ["Lovable"],
445+
"ARTIST": ["ave;new", "佐倉紗織"], # Multiple ARTIST fields
446+
"ARTISTSORT": ["ave;new feat.Sakura, Saori"],
447+
"MUSICBRAINZ_ARTISTID": [
448+
"2ade7b3c-a6f1-4d00-b7f7-fc60abf25dba",
449+
"822c07bd-1f8a-4fef-acdb-8acfe82fbef5",
450+
],
451+
}
452+
)
453+
454+
result = _parse_vorbis_tags(mock_tags)
455+
456+
# Multiple ARTIST fields should be stored as "artists" (plural key)
457+
assert result.get("artists") == ["ave;new", "佐倉紗織"]
458+
# MusicBrainz Artist IDs should be preserved
459+
assert result.get("musicbrainzartistid") == [
460+
"2ade7b3c-a6f1-4d00-b7f7-fc60abf25dba",
461+
"822c07bd-1f8a-4fef-acdb-8acfe82fbef5",
462+
]
463+
464+
# Now test that AudioTags.artists property correctly handles the multiple fields
465+
audio_tags = tags.AudioTags(
466+
raw={},
467+
sample_rate=44100,
468+
channels=2,
469+
bits_per_sample=16,
470+
format="flac",
471+
bit_rate=None,
472+
duration=180.0,
473+
tags=result,
474+
has_cover_image=False,
475+
filename="01 - ave;new feat.佐倉紗織 - Call My Dears.flac",
476+
)
477+
478+
# The artists property must return exactly 2 artists
479+
assert audio_tags.artists == ("ave;new", "佐倉紗織")
480+
# The semicolon in "ave;new" must NOT cause it to be split
481+
assert "ave" not in audio_tags.artists
482+
assert "new" not in audio_tags.artists
483+
# MusicBrainz Artist IDs should match the artist count
484+
assert audio_tags.musicbrainz_artistids == (
485+
"2ade7b3c-a6f1-4d00-b7f7-fc60abf25dba",
486+
"822c07bd-1f8a-4fef-acdb-8acfe82fbef5",
487+
)
488+
assert len(audio_tags.artists) == len(audio_tags.musicbrainz_artistids)
489+
490+
491+
def test_id3_artist_tag_semicolon_single_mbid() -> None:
492+
"""Test that single ARTIST tag with semicolon is not split when 1 MB ID exists.
493+
494+
Regression test for formats without multi-value ARTISTS tag support (ID3, etc.):
495+
- Artist name "ave;new" contains a semicolon
496+
- Single MUSICBRAINZ_ARTISTID confirms this is one artist
497+
- The semicolon must NOT cause the name to be split into "ave" and "new"
498+
499+
See: https://musicbrainz.org/artist/2ade7b3c-a6f1-4d00-b7f7-fc60abf25dba
500+
"""
501+
# Simulate ID3 tags: single ARTIST field with semicolon, single MB ID
502+
audio_tags = tags.AudioTags(
503+
raw={},
504+
sample_rate=44100,
505+
channels=2,
506+
bits_per_sample=16,
507+
format="mp3",
508+
bit_rate=None,
509+
duration=180.0,
510+
tags={
511+
"title": "Colorful",
512+
"album": "Lovable",
513+
"artist": "ave;new",
514+
"musicbrainzartistid": "2ade7b3c-a6f1-4d00-b7f7-fc60abf25dba",
515+
},
516+
has_cover_image=False,
517+
filename="01 - ave;new - Colorful.mp3",
518+
)
519+
520+
# Single MB ID = single artist, no splitting
521+
assert audio_tags.artists == ("ave;new",)
522+
assert audio_tags.musicbrainz_artistids == ("2ade7b3c-a6f1-4d00-b7f7-fc60abf25dba",)
523+
# Verify the semicolon did NOT cause incorrect splitting
524+
assert "ave" not in audio_tags.artists
525+
assert "new" not in audio_tags.artists
526+
527+
528+
def test_id3_artist_tag_semicolon_multiple_mbids() -> None:
529+
"""Test that ARTIST tag with semicolon IS split when multiple MB IDs exist.
530+
531+
When multiple MusicBrainz Artist IDs are present, the semicolon should be
532+
treated as a separator between artists.
533+
"""
534+
audio_tags = tags.AudioTags(
535+
raw={},
536+
sample_rate=44100,
537+
channels=2,
538+
bits_per_sample=16,
539+
format="mp3",
540+
bit_rate=None,
541+
duration=180.0,
542+
# musicbrainzartistid can be list[str] from mutagen (dict type is str for ffprobe compat)
543+
tags={
544+
"artist": "Artist A;Artist B",
545+
"musicbrainzartistid": ["id-a", "id-b"], # type: ignore[dict-item]
546+
},
547+
has_cover_image=False,
548+
filename="test.mp3",
549+
)
550+
551+
# Multiple MB IDs = semicolon should split
552+
assert audio_tags.artists == ("Artist A", "Artist B")
553+
assert audio_tags.musicbrainz_artistids == ("id-a", "id-b")
554+
555+
556+
def test_id3_albumartist_tag_semicolon_single_mbid() -> None:
557+
"""Test that ALBUMARTIST tag with semicolon is not split when 1 MB Album Artist ID exists."""
558+
audio_tags = tags.AudioTags(
559+
raw={},
560+
sample_rate=44100,
561+
channels=2,
562+
bits_per_sample=16,
563+
format="mp3",
564+
bit_rate=None,
565+
duration=180.0,
566+
tags={
567+
"albumartist": "ave;new",
568+
"musicbrainzalbumartistid": "2ade7b3c-a6f1-4d00-b7f7-fc60abf25dba",
569+
},
570+
has_cover_image=False,
571+
filename="test.mp3",
572+
)
573+
574+
# Single MB Album Artist ID = single artist, no splitting
575+
assert audio_tags.album_artists == ("ave;new",)
576+
assert audio_tags.musicbrainz_albumartistids == ("2ade7b3c-a6f1-4d00-b7f7-fc60abf25dba",)

0 commit comments

Comments
 (0)