@@ -419,3 +419,158 @@ def test_parse_apev2_tags_genre_multi_value() -> None:
419419 result = _parse_apev2_tags (mock_tags )
420420
421421 assert result .get ("genre" ) == ["Rock" , "Pop" , "Jazz" ]
422+
423+
424+ def test_vorbis_multiple_artist_fields_semicolon_in_name () -> None :
425+ """Test that multiple ARTIST fields in Vorbis with semicolons are handled correctly.
426+
427+ Regression test for the "ave;new" edge case per the Vorbis spec:
428+ - Japanese artist "ave;new" has a semicolon in their name
429+ - Vorbis allows multiple ARTIST (singular) fields for multi-artist tracks
430+ - The semicolon within "ave;new" must NOT cause additional splitting
431+
432+ Correct Vorbis tagging (per https://xiph.org/vorbis/doc/v-comment.html):
433+ ARTIST=ave;new
434+ ARTIST=佐倉紗織
435+ MUSICBRAINZ_ARTISTID=2ade7b3c-a6f1-4d00-b7f7-fc60abf25dba
436+ MUSICBRAINZ_ARTISTID=822c07bd-1f8a-4fef-acdb-8acfe82fbef5
437+
438+ See: https://musicbrainz.org/artist/2ade7b3c-a6f1-4d00-b7f7-fc60abf25dba
439+ """
440+ # Simulate Vorbis tags with multiple ARTIST fields (correct per Vorbis spec)
441+ mock_tags = _create_mock_vorbis_tags (
442+ {
443+ "TITLE" : ["Call My Dears" ],
444+ "ALBUM" : ["Lovable" ],
445+ "ARTIST" : ["ave;new" , "佐倉紗織" ], # Multiple ARTIST fields
446+ "ARTISTSORT" : ["ave;new feat.Sakura, Saori" ],
447+ "MUSICBRAINZ_ARTISTID" : [
448+ "2ade7b3c-a6f1-4d00-b7f7-fc60abf25dba" ,
449+ "822c07bd-1f8a-4fef-acdb-8acfe82fbef5" ,
450+ ],
451+ }
452+ )
453+
454+ result = _parse_vorbis_tags (mock_tags )
455+
456+ # Multiple ARTIST fields should be stored as "artists" (plural key)
457+ assert result .get ("artists" ) == ["ave;new" , "佐倉紗織" ]
458+ # MusicBrainz Artist IDs should be preserved
459+ assert result .get ("musicbrainzartistid" ) == [
460+ "2ade7b3c-a6f1-4d00-b7f7-fc60abf25dba" ,
461+ "822c07bd-1f8a-4fef-acdb-8acfe82fbef5" ,
462+ ]
463+
464+ # Now test that AudioTags.artists property correctly handles the multiple fields
465+ audio_tags = tags .AudioTags (
466+ raw = {},
467+ sample_rate = 44100 ,
468+ channels = 2 ,
469+ bits_per_sample = 16 ,
470+ format = "flac" ,
471+ bit_rate = None ,
472+ duration = 180.0 ,
473+ tags = result ,
474+ has_cover_image = False ,
475+ filename = "01 - ave;new feat.佐倉紗織 - Call My Dears.flac" ,
476+ )
477+
478+ # The artists property must return exactly 2 artists
479+ assert audio_tags .artists == ("ave;new" , "佐倉紗織" )
480+ # The semicolon in "ave;new" must NOT cause it to be split
481+ assert "ave" not in audio_tags .artists
482+ assert "new" not in audio_tags .artists
483+ # MusicBrainz Artist IDs should match the artist count
484+ assert audio_tags .musicbrainz_artistids == (
485+ "2ade7b3c-a6f1-4d00-b7f7-fc60abf25dba" ,
486+ "822c07bd-1f8a-4fef-acdb-8acfe82fbef5" ,
487+ )
488+ assert len (audio_tags .artists ) == len (audio_tags .musicbrainz_artistids )
489+
490+
491+ def test_id3_artist_tag_semicolon_single_mbid () -> None :
492+ """Test that single ARTIST tag with semicolon is not split when 1 MB ID exists.
493+
494+ Regression test for formats without multi-value ARTISTS tag support (ID3, etc.):
495+ - Artist name "ave;new" contains a semicolon
496+ - Single MUSICBRAINZ_ARTISTID confirms this is one artist
497+ - The semicolon must NOT cause the name to be split into "ave" and "new"
498+
499+ See: https://musicbrainz.org/artist/2ade7b3c-a6f1-4d00-b7f7-fc60abf25dba
500+ """
501+ # Simulate ID3 tags: single ARTIST field with semicolon, single MB ID
502+ audio_tags = tags .AudioTags (
503+ raw = {},
504+ sample_rate = 44100 ,
505+ channels = 2 ,
506+ bits_per_sample = 16 ,
507+ format = "mp3" ,
508+ bit_rate = None ,
509+ duration = 180.0 ,
510+ tags = {
511+ "title" : "Colorful" ,
512+ "album" : "Lovable" ,
513+ "artist" : "ave;new" ,
514+ "musicbrainzartistid" : "2ade7b3c-a6f1-4d00-b7f7-fc60abf25dba" ,
515+ },
516+ has_cover_image = False ,
517+ filename = "01 - ave;new - Colorful.mp3" ,
518+ )
519+
520+ # Single MB ID = single artist, no splitting
521+ assert audio_tags .artists == ("ave;new" ,)
522+ assert audio_tags .musicbrainz_artistids == ("2ade7b3c-a6f1-4d00-b7f7-fc60abf25dba" ,)
523+ # Verify the semicolon did NOT cause incorrect splitting
524+ assert "ave" not in audio_tags .artists
525+ assert "new" not in audio_tags .artists
526+
527+
528+ def test_id3_artist_tag_semicolon_multiple_mbids () -> None :
529+ """Test that ARTIST tag with semicolon IS split when multiple MB IDs exist.
530+
531+ When multiple MusicBrainz Artist IDs are present, the semicolon should be
532+ treated as a separator between artists.
533+ """
534+ audio_tags = tags .AudioTags (
535+ raw = {},
536+ sample_rate = 44100 ,
537+ channels = 2 ,
538+ bits_per_sample = 16 ,
539+ format = "mp3" ,
540+ bit_rate = None ,
541+ duration = 180.0 ,
542+ # musicbrainzartistid can be list[str] from mutagen (dict type is str for ffprobe compat)
543+ tags = {
544+ "artist" : "Artist A;Artist B" ,
545+ "musicbrainzartistid" : ["id-a" , "id-b" ], # type: ignore[dict-item]
546+ },
547+ has_cover_image = False ,
548+ filename = "test.mp3" ,
549+ )
550+
551+ # Multiple MB IDs = semicolon should split
552+ assert audio_tags .artists == ("Artist A" , "Artist B" )
553+ assert audio_tags .musicbrainz_artistids == ("id-a" , "id-b" )
554+
555+
556+ def test_id3_albumartist_tag_semicolon_single_mbid () -> None :
557+ """Test that ALBUMARTIST tag with semicolon is not split when 1 MB Album Artist ID exists."""
558+ audio_tags = tags .AudioTags (
559+ raw = {},
560+ sample_rate = 44100 ,
561+ channels = 2 ,
562+ bits_per_sample = 16 ,
563+ format = "mp3" ,
564+ bit_rate = None ,
565+ duration = 180.0 ,
566+ tags = {
567+ "albumartist" : "ave;new" ,
568+ "musicbrainzalbumartistid" : "2ade7b3c-a6f1-4d00-b7f7-fc60abf25dba" ,
569+ },
570+ has_cover_image = False ,
571+ filename = "test.mp3" ,
572+ )
573+
574+ # Single MB Album Artist ID = single artist, no splitting
575+ assert audio_tags .album_artists == ("ave;new" ,)
576+ assert audio_tags .musicbrainz_albumartistids == ("2ade7b3c-a6f1-4d00-b7f7-fc60abf25dba" ,)
0 commit comments