Update number of mel bins for whisper model (microsoft#25675)

ajindal1 · web-flow · commit 33debbeea1aa · 2025-08-06T21:33:52.000-07:00
### Description
&lt;!-- Describe your changes. --&gt;
Update number of mel bins for whisper model as it differs based on the
Whisper model version. Otherwise I am unable to run Whisper v3 models as
the num_mel_bins is 128 for that.


### Motivation and Context
&lt;!-- - Why is this change required? What problem does it solve?
- If it fixes an open issue, please link to the issue here. --&gt;
Currently unable to run Whisper v3 models as the num_mel_bins is 128 and
it is fixed to 80 right now and causes issue during preprocessing.
diff --git a/onnxruntime/python/tools/transformers/models/whisper/whisper_helper.py b/onnxruntime/python/tools/transformers/models/whisper/whisper_helper.py
@@ -6,7 +6,6 @@
 import json
 import logging
 import os
-import textwrap
 from pathlib import Path
 
 import numpy as np
@@ -93,16 +92,10 @@ def save_processing(
         if separate_encoder_and_decoder_init:
             return
 
-        audio_processor_json = textwrap.dedent("""\
-        {
+        audio_processor_cfg = {
             "feature_extraction": {
                 "sequence": [
-                    {
-                        "operation": {
-                            "name": "audio_decoder",
-                            "type": "AudioDecoder"
-                        }
-                    },
+                    {"operation": {"name": "audio_decoder", "type": "AudioDecoder"}},
                     {
                         "operation": {
                             "name": "STFT",
@@ -511,27 +504,23 @@ def save_processing(
                                     0.000986635684967041,
                                     0.0005550682544708252,
                                     0.0002467334270477295,
-                                    0.0000616908073425293
-                                ]
-                            }
+                                    0.0000616908073425293,
+                                ],
+                            },
                         }
                     },
                     {
                         "operation": {
                             "name": "log_mel_spectrogram",
                             "type": "LogMelSpectrum",
-                            "attrs": {
-                                "chunk_size": 30,
-                                "hop_length": 160,
-                                "n_fft": 400,
-                                "n_mel": 80
-                            }
+                            "attrs": {"chunk_size": 30, "hop_length": 160, "n_fft": 400, "n_mel": config.num_mel_bins},
                         }
-                    }
+                    },
                 ]
             }
         }
-        """)
+        audio_processor_json = json.dumps(audio_processor_cfg, indent=4)
+
         with open(os.path.join(output_dir, "audio_processor_config.json"), "w") as f:
             f.write(audio_processor_json)