Skip to content

Commit 33debbe

Browse files
authored
Update number of mel bins for whisper model (microsoft#25675)
### Description <!-- Describe your changes. --> Update number of mel bins for whisper model as it differs based on the Whisper model version. Otherwise I am unable to run Whisper v3 models as the num_mel_bins is 128 for that. ### Motivation and Context <!-- - Why is this change required? What problem does it solve? - If it fixes an open issue, please link to the issue here. --> Currently unable to run Whisper v3 models as the num_mel_bins is 128 and it is fixed to 80 right now and causes issue during preprocessing.
1 parent 9c9e3a6 commit 33debbe

File tree

1 file changed

+9
-20
lines changed

1 file changed

+9
-20
lines changed

onnxruntime/python/tools/transformers/models/whisper/whisper_helper.py

Lines changed: 9 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
import json
77
import logging
88
import os
9-
import textwrap
109
from pathlib import Path
1110

1211
import numpy as np
@@ -93,16 +92,10 @@ def save_processing(
9392
if separate_encoder_and_decoder_init:
9493
return
9594

96-
audio_processor_json = textwrap.dedent("""\
97-
{
95+
audio_processor_cfg = {
9896
"feature_extraction": {
9997
"sequence": [
100-
{
101-
"operation": {
102-
"name": "audio_decoder",
103-
"type": "AudioDecoder"
104-
}
105-
},
98+
{"operation": {"name": "audio_decoder", "type": "AudioDecoder"}},
10699
{
107100
"operation": {
108101
"name": "STFT",
@@ -511,27 +504,23 @@ def save_processing(
511504
0.000986635684967041,
512505
0.0005550682544708252,
513506
0.0002467334270477295,
514-
0.0000616908073425293
515-
]
516-
}
507+
0.0000616908073425293,
508+
],
509+
},
517510
}
518511
},
519512
{
520513
"operation": {
521514
"name": "log_mel_spectrogram",
522515
"type": "LogMelSpectrum",
523-
"attrs": {
524-
"chunk_size": 30,
525-
"hop_length": 160,
526-
"n_fft": 400,
527-
"n_mel": 80
528-
}
516+
"attrs": {"chunk_size": 30, "hop_length": 160, "n_fft": 400, "n_mel": config.num_mel_bins},
529517
}
530-
}
518+
},
531519
]
532520
}
533521
}
534-
""")
522+
audio_processor_json = json.dumps(audio_processor_cfg, indent=4)
523+
535524
with open(os.path.join(output_dir, "audio_processor_config.json"), "w") as f:
536525
f.write(audio_processor_json)
537526

0 commit comments

Comments
 (0)