@@ -822,14 +822,16 @@ def _pad_audio_token(token):
822822 if callback is not None :
823823 callback (95 , "Decoding audio..." )
824824
825+ # Get codec dtype once for consistency
826+ codec_dtype = getattr (pipeline , 'codec_dtype' , torch .float32 )
827+
825828 # Lazy codec loading: Load HeartCodec only when needed (for 12GB GPU mode)
826829 lazy_codec = getattr (pipeline , '_lazy_codec' , False )
827830 if lazy_codec and pipeline ._codec is None :
828831 print ("[Lazy Loading] Loading HeartCodec for decoding..." , flush = True )
829832 codec_path = getattr (pipeline , '_codec_path' , None )
830833 if codec_path :
831834 # Use the same dtype as specified in the pipeline for consistency
832- codec_dtype = getattr (pipeline , 'codec_dtype' , torch .float32 )
833835 pipeline ._codec = HeartCodec .from_pretrained (
834836 codec_path ,
835837 device_map = pipeline .codec_device ,
@@ -842,7 +844,8 @@ def _pad_audio_token(token):
842844 else :
843845 raise RuntimeError ("Cannot load HeartCodec: codec_path not available" )
844846
845- frames_for_codec = frames .to (pipeline .codec_device )
847+ # Convert frames to codec device and dtype (important for MPS float16)
848+ frames_for_codec = frames .to (device = pipeline .codec_device , dtype = codec_dtype )
846849 wav = pipeline .codec .detokenize (frames_for_codec )
847850
848851 # Cleanup codec if using lazy loading (free VRAM for next generation)
0 commit comments