22import math
33import os
44import struct
5+ import sys
56import wave
67from io import BytesIO
78from pathlib import Path
@@ -653,19 +654,39 @@ def test_path_mapping_accepts_standard_and_legacy_file_uri(tmp_path):
653654
654655
655656@pytest .mark .asyncio
656- async def test_tencent_silk_encoding_uses_pysilk_tencent_format (tmp_path , monkeypatch ):
657+ @pytest .mark .parametrize (
658+ "rate, channels" ,
659+ [
660+ (24000 , 1 ), # supported, no resample
661+ (44100 , 1 ), # unsupported rate, triggers resample
662+ (22050 , 1 ), # unsupported rate, triggers resample
663+ (48000 , 2 ), # stereo at supported rate, triggers downmix
664+ (44100 , 2 ), # stereo + unsupported rate, triggers both
665+ ],
666+ ids = ["24k-mono" , "44.1k-mono" , "22.05k-mono" , "48k-stereo" , "44.1k-stereo" ],
667+ )
668+ async def test_tencent_silk_encoding_uses_pysilk_tencent_format (
669+ rate , channels , tmp_path , monkeypatch
670+ ):
671+ """Real pysilk end-to-end across sample rates that previously failed.
672+
673+ 44100 Hz was the regression trigger: pysilk rejects it with
674+ ENC_INPUT_INVALID_NO_OF_SAMPLES. The fix resamples to 24 kHz mono via
675+ audioop.ratecv before encoding.
676+ """
657677 monkeypatch .setattr (media_utils , "get_astrbot_temp_path" , lambda : str (tmp_path ))
658678 wav_path = tmp_path / "tone.wav"
659679 silk_path = tmp_path / "tone.silk"
660- rate = 24000
661- frames = int (rate * 0.2 )
680+ secs = 0.2
681+ frames = int (rate * secs )
662682 with wave .open (str (wav_path ), "wb" ) as wav :
663- wav .setnchannels (1 )
683+ wav .setnchannels (channels )
664684 wav .setsampwidth (2 )
665685 wav .setframerate (rate )
666686 for i in range (frames ):
667687 sample = int (0.2 * 32767 * math .sin (2 * math .pi * 440 * i / rate ))
668- wav .writeframesraw (struct .pack ("<h" , sample ))
688+ for _ in range (channels ):
689+ wav .writeframesraw (struct .pack ("<h" , sample ))
669690
670691 duration = await wav_to_tencent_silk (str (wav_path ), str (silk_path ))
671692 silk_bytes = silk_path .read_bytes ()
@@ -679,7 +700,82 @@ async def test_tencent_silk_encoding_uses_pysilk_tencent_format(tmp_path, monkey
679700 assert resolved .format == "tencent_silk"
680701 assert resolved .mime_type == "audio/silk"
681702
682- assert duration == pytest .approx (0.2 )
703+ assert duration == pytest .approx (secs , abs = 0.05 )
683704 assert silk_bytes .startswith (b"\x02 #!SILK_V3" )
684705 assert resolved_silk_bytes .startswith (b"\x02 #!SILK_V3" )
685706 assert not resolved_silk_path .exists ()
707+
708+
709+ def _make_wav (path , rate , channels = 1 , secs = 0.2 , freq = 440 ):
710+ """Write a short sine-tone WAV at the given rate/channels."""
711+ nframes = int (rate * secs )
712+ with wave .open (str (path ), "wb" ) as wav :
713+ wav .setnchannels (channels )
714+ wav .setsampwidth (2 )
715+ wav .setframerate (rate )
716+ for i in range (nframes ):
717+ sample = int (0.2 * 32767 * math .sin (2 * math .pi * freq * i / rate ))
718+ for _ in range (channels ):
719+ wav .writeframesraw (struct .pack ("<h" , sample ))
720+
721+
722+ class _FakePysilk :
723+ """Stand-in for the ``pysilk`` module that records encode() calls."""
724+
725+ def __init__ (self ):
726+ self .calls = []
727+
728+ def encode (self , input_io , output_io , sample_rate , bit_rate , tencent = True ):
729+ self .calls .append ({"sample_rate" : sample_rate , "tencent" : tencent })
730+ output_io .write (b"\x02 #!SILK_V3" )
731+
732+
733+ @pytest .mark .asyncio
734+ async def test_wav_to_tencent_silk_resamples_unsupported_rate (tmp_path , monkeypatch ):
735+ """44100 Hz input must be resampled to 24 kHz before pysilk.encode."""
736+ fake = _FakePysilk ()
737+ monkeypatch .setitem (sys .modules , "pysilk" , fake )
738+
739+ wav_path = tmp_path / "tts_44100.wav"
740+ _make_wav (wav_path , 44100 )
741+
742+ silk_path = tmp_path / "out.silk"
743+ await wav_to_tencent_silk (str (wav_path ), str (silk_path ))
744+
745+ assert len (fake .calls ) == 1
746+ assert fake .calls [0 ]["sample_rate" ] == 24000
747+ assert fake .calls [0 ]["tencent" ] is True
748+ assert silk_path .read_bytes ().startswith (b"\x02 #!SILK_V3" )
749+
750+
751+ @pytest .mark .asyncio
752+ async def test_wav_to_tencent_silk_resamples_stereo (tmp_path , monkeypatch ):
753+ """Stereo input at a supported rate must still be downmixed to mono."""
754+ fake = _FakePysilk ()
755+ monkeypatch .setitem (sys .modules , "pysilk" , fake )
756+
757+ wav_path = tmp_path / "stereo_48k.wav"
758+ _make_wav (wav_path , 48000 , channels = 2 )
759+
760+ await wav_to_tencent_silk (str (wav_path ), str (tmp_path / "out.silk" ))
761+
762+ assert len (fake .calls ) == 1
763+ # 48000 Hz is supported, so only downmix happens -- rate stays unchanged.
764+ assert fake .calls [0 ]["sample_rate" ] == 48000
765+
766+
767+ @pytest .mark .asyncio
768+ async def test_wav_to_tencent_silk_skips_resample_for_supported_rate (
769+ tmp_path , monkeypatch
770+ ):
771+ """24000 Hz mono must go straight to pysilk without resampling."""
772+ fake = _FakePysilk ()
773+ monkeypatch .setitem (sys .modules , "pysilk" , fake )
774+
775+ wav_path = tmp_path / "tone_24k.wav"
776+ _make_wav (wav_path , 24000 )
777+
778+ await wav_to_tencent_silk (str (wav_path ), str (tmp_path / "out.silk" ))
779+
780+ assert len (fake .calls ) == 1
781+ assert fake .calls [0 ]["sample_rate" ] == 24000
0 commit comments