Fix several edge cases for streaming (with crossfade enabled) (#2547)

marcelveldt · web-flow · commit d6669fc4fdf5 · 2025-10-25T17:20:10.000+02:00
diff --git a/music_assistant/constants.py b/music_assistant/constants.py
@@ -630,6 +630,18 @@ def create_output_codec_config_entry(
     }
 )
 
+CONF_ENTRY_SUPPORT_CROSSFADE_DIFFERENT_SAMPLE_RATES = ConfigEntry(
+    key="crossfade_different_sample_rates",
+    type=ConfigEntryType.BOOLEAN,
+    label="Allow crossfade between tracks with different sample rates",
+    description="Enable this option to allow crossfading between tracks that have different "
+    "sample rates (e.g. 44.1kHz to 48kHz). \n\n "
+    "Only enable this option if your player actually support this, otherwise you may "
+    "experience audio glitches during crossfades.",
+    default_value=False,
+    category="advanced",
+)
+
 CONF_ENTRY_WARN_PREVIEW = ConfigEntry(
     key="preview_note",
     type=ConfigEntryType.ALERT,
@@ -929,13 +941,13 @@ def create_sample_rates_config_entry(
     "icy-logo": MASS_LOGO_ONLINE,
 }
 
-DEFAULT_PCM_FORMAT = AudioFormat(
+INTERNAL_PCM_FORMAT = AudioFormat(
     # always prefer float32 as internal pcm format to create headroom
     # for filters such as dsp and volume normalization
     content_type=ContentType.PCM_F32LE,
-    sample_rate=48000,
-    bit_depth=32,
-    channels=2,
+    bit_depth=32,  # related to float32
+    sample_rate=48000,  # static for flow stream, dynamic for anything else
+    channels=2,  # static for flow stream, dynamic for anything else
 )
 
 # extra data / extra attributes keys
diff --git a/music_assistant/controllers/streams.py b/music_assistant/controllers/streams.py
diff --git a/music_assistant/helpers/audio.py b/music_assistant/helpers/audio.py
@@ -52,7 +52,7 @@
 from .audio_buffer import AudioBuffer
 from .datetime import utc
 from .dsp import filter_to_ffmpeg_params
-from .ffmpeg import FFMpeg, get_ffmpeg_stream
+from .ffmpeg import FFMpeg, get_ffmpeg_args, get_ffmpeg_stream
 from .playlists import IsHLSPlaylist, PlaylistItem, fetch_playlist, parse_m3u
 from .process import AsyncProcess, communicate
 from .util import detect_charset
@@ -421,7 +421,7 @@ async def get_stream_details(
     return streamdetails
 
 
-async def get_media_stream_with_buffer(
+async def get_buffered_media_stream(
     mass: MusicAssistant,
     streamdetails: StreamDetails,
     pcm_format: AudioFormat,
@@ -436,8 +436,8 @@ async def get_media_stream_with_buffer(
         seek_position,
     )
 
-    # checksum based on pcm_format and filter_params
-    checksum = f"{pcm_format}-{filter_params}"
+    # checksum based on filter_params
+    checksum = f"{filter_params}"
 
     async def fill_buffer_task() -> None:
         """Background task to fill the audio buffer."""
@@ -528,6 +528,25 @@ async def fill_buffer_task() -> None:
         task = mass.loop.create_task(fill_buffer_task())
         audio_buffer.attach_fill_task(task)
 
+    # special case: pcm format mismatch, resample on the fly
+    # this may happen in some special situations such as crossfading
+    # and its a bit of a waste to throw away the existing buffer
+    if audio_buffer.pcm_format != pcm_format:
+        LOGGER.info(
+            "buffered_media_stream: pcm format mismatch, resampling on the fly for %s - "
+            "buffer format: %s - requested format: %s",
+            streamdetails.uri,
+            audio_buffer.pcm_format,
+            pcm_format,
+        )
+        async for chunk in get_ffmpeg_stream(
+            audio_input=audio_buffer.iter(seek_position=seek_position),
+            input_format=audio_buffer.pcm_format,
+            output_format=pcm_format,
+        ):
+            yield chunk
+        return
+
     # yield data from the buffer
     chunk_count = 0
     try:
@@ -631,7 +650,7 @@ async def get_media_stream(
                 first_chunk_received = True
                 streamdetails.audio_format.codec_type = ffmpeg_proc.input_format.codec_type
                 logger.debug(
-                    "First chunk received after %s seconds (codec detected: %s)",
+                    "First chunk received after %.2f seconds (codec detected: %s)",
                     mass.loop.time() - stream_start,
                     ffmpeg_proc.input_format.codec_type,
                 )
@@ -1209,23 +1228,19 @@ async def get_silence(
 
 
 async def resample_pcm_audio(
-    input_audio: bytes | AsyncGenerator[bytes, None],
+    input_audio: bytes,
     input_format: AudioFormat,
     output_format: AudioFormat,
-) -> AsyncGenerator[bytes, None]:
+) -> bytes:
     """Resample (a chunk of) PCM audio from input_format to output_format using ffmpeg."""
-    LOGGER.debug(f"Resampling audio from {input_format} to {output_format}")
-
-    async def _yielder() -> AsyncGenerator[bytes, None]:
-        yield input_audio  # type: ignore[misc]
-
-    async for chunk in get_ffmpeg_stream(
-        audio_input=_yielder() if isinstance(input_audio, bytes) else input_audio,
-        input_format=input_format,
-        output_format=output_format,
-        raise_ffmpeg_exception=True,
-    ):
-        yield chunk
+    if input_format == output_format:
+        return input_audio
+    LOGGER.log(VERBOSE_LOG_LEVEL, f"Resampling audio from {input_format} to {output_format}")
+    ffmpeg_args = get_ffmpeg_args(
+        input_format=input_format, output_format=output_format, filter_params=[]
+    )
+    _, stdout, _ = await communicate(ffmpeg_args, input_audio)
+    return stdout
 
 
 def get_chunksize(
diff --git a/music_assistant/providers/airplay/constants.py b/music_assistant/providers/airplay/constants.py
@@ -7,7 +7,7 @@
 from music_assistant_models.enums import ContentType
 from music_assistant_models.media_items import AudioFormat
 
-from music_assistant.constants import DEFAULT_PCM_FORMAT
+from music_assistant.constants import INTERNAL_PCM_FORMAT
 
 DOMAIN = "airplay"
 
@@ -27,9 +27,9 @@
 FALLBACK_VOLUME: Final[int] = 20
 
 AIRPLAY_FLOW_PCM_FORMAT = AudioFormat(
-    content_type=DEFAULT_PCM_FORMAT.content_type,
+    content_type=INTERNAL_PCM_FORMAT.content_type,
     sample_rate=44100,
-    bit_depth=DEFAULT_PCM_FORMAT.bit_depth,
+    bit_depth=INTERNAL_PCM_FORMAT.bit_depth,
 )
 AIRPLAY_PCM_FORMAT = AudioFormat(
     content_type=ContentType.from_bit_depth(16), sample_rate=44100, bit_depth=16
diff --git a/music_assistant/providers/builtin_player/player.py b/music_assistant/providers/builtin_player/player.py
@@ -27,8 +27,8 @@
     CONF_MUTE_CONTROL,
     CONF_POWER_CONTROL,
     CONF_VOLUME_CONTROL,
-    DEFAULT_PCM_FORMAT,
     DEFAULT_STREAM_HEADERS,
+    INTERNAL_PCM_FORMAT,
     create_sample_rates_config_entry,
 )
 from music_assistant.helpers.audio import get_player_filter_params
@@ -274,9 +274,9 @@ async def _serve_audio_stream(self, request: web.Request) -> web.StreamResponse:
 
         pcm_format = AudioFormat(
             sample_rate=stream_format.sample_rate,
-            content_type=DEFAULT_PCM_FORMAT.content_type,
-            bit_depth=DEFAULT_PCM_FORMAT.bit_depth,
-            channels=DEFAULT_PCM_FORMAT.channels,
+            content_type=INTERNAL_PCM_FORMAT.content_type,
+            bit_depth=INTERNAL_PCM_FORMAT.bit_depth,
+            channels=INTERNAL_PCM_FORMAT.channels,
         )
         async for chunk in get_ffmpeg_stream(
             audio_input=self.mass.streams.get_queue_flow_stream(
diff --git a/music_assistant/providers/snapcast/player.py b/music_assistant/providers/snapcast/player.py
@@ -19,7 +19,7 @@
     ATTR_ANNOUNCEMENT_IN_PROGRESS,
     CONF_ENTRY_FLOW_MODE_ENFORCED,
     CONF_ENTRY_OUTPUT_CODEC_HIDDEN,
-    DEFAULT_PCM_FORMAT,
+    INTERNAL_PCM_FORMAT,
 )
 from music_assistant.helpers.audio import get_player_filter_params
 from music_assistant.helpers.compare import create_safe_string
@@ -216,7 +216,7 @@ async def play_media(self, media: PlayerMedia) -> None:
             audio_source = self.mass.streams.get_queue_flow_stream(
                 queue=queue,
                 start_queue_item=start_queue_item,
-                pcm_format=DEFAULT_PCM_FORMAT,
+                pcm_format=INTERNAL_PCM_FORMAT,
             )
         else:
             # assume url or some other direct path
diff --git a/music_assistant/providers/squeezelite/player.py b/music_assistant/providers/squeezelite/player.py
@@ -33,8 +33,9 @@
     CONF_ENTRY_DEPRECATED_EQ_TREBLE,
     CONF_ENTRY_HTTP_PROFILE_FORCED_2,
     CONF_ENTRY_OUTPUT_CODEC,
+    CONF_ENTRY_SUPPORT_CROSSFADE_DIFFERENT_SAMPLE_RATES,
     CONF_ENTRY_SYNC_ADJUST,
-    DEFAULT_PCM_FORMAT,
+    INTERNAL_PCM_FORMAT,
     VERBOSE_LOG_LEVEL,
     create_sample_rates_config_entry,
 )
@@ -92,6 +93,7 @@ def __init__(
             PlayerFeature.VOLUME_MUTE,
             PlayerFeature.ENQUEUE,
             PlayerFeature.GAPLESS_PLAYBACK,
+            PlayerFeature.GAPLESS_DIFFERENT_SAMPLERATE,
         }
         self._attr_can_group_with = {provider.lookup_key}
         self.multi_client_stream: MultiClientStream | None = None
@@ -162,6 +164,7 @@ async def get_config_entries(self) -> list[ConfigEntry]:
             create_sample_rates_config_entry(
                 max_sample_rate=max_sample_rate, max_bit_depth=24, safe_max_bit_depth=24
             ),
+            CONF_ENTRY_SUPPORT_CROSSFADE_DIFFERENT_SAMPLE_RATES,
         ]
 
     async def power(self, powered: bool) -> None:
@@ -229,9 +232,9 @@ async def play_media(self, media: PlayerMedia) -> None:
 
         # this is a syncgroup, we need to handle this with a multi client stream
         master_audio_format = AudioFormat(
-            content_type=DEFAULT_PCM_FORMAT.content_type,
-            sample_rate=DEFAULT_PCM_FORMAT.sample_rate,
-            bit_depth=DEFAULT_PCM_FORMAT.bit_depth,
+            content_type=INTERNAL_PCM_FORMAT.content_type,
+            sample_rate=INTERNAL_PCM_FORMAT.sample_rate,
+            bit_depth=INTERNAL_PCM_FORMAT.bit_depth,
         )
         if media.media_type == MediaType.ANNOUNCEMENT:
             # special case: stream announcement
diff --git a/music_assistant/providers/universal_group/constants.py b/music_assistant/providers/universal_group/constants.py
@@ -8,7 +8,7 @@
 from music_assistant_models.enums import ConfigEntryType
 from music_assistant_models.media_items import AudioFormat
 
-from music_assistant.constants import DEFAULT_PCM_FORMAT, create_sample_rates_config_entry
+from music_assistant.constants import INTERNAL_PCM_FORMAT, create_sample_rates_config_entry
 
 UGP_PREFIX: Final[str] = "ugp_"
 
@@ -29,7 +29,7 @@
 
 
 UGP_FORMAT = AudioFormat(
-    content_type=DEFAULT_PCM_FORMAT.content_type,
-    sample_rate=DEFAULT_PCM_FORMAT.sample_rate,
-    bit_depth=DEFAULT_PCM_FORMAT.bit_depth,
+    content_type=INTERNAL_PCM_FORMAT.content_type,
+    sample_rate=INTERNAL_PCM_FORMAT.sample_rate,
+    bit_depth=INTERNAL_PCM_FORMAT.bit_depth,
 )