22import librosa .filters
33import math
44import numpy as np
5+ import tensorflow as tf
56from scipy import signal
67from hparams import hparams
78
@@ -15,50 +16,96 @@ def save_wav(wav, path):
1516 librosa .output .write_wav (path , wav .astype (np .int16 ), hparams .sample_rate )
1617
1718
19+ def preemphasis (x ):
20+ return signal .lfilter ([1 , - hparams .preemphasis ], [1 ], x )
21+
22+
23+ def inv_preemphasis (x ):
24+ return signal .lfilter ([1 ], [1 , - hparams .preemphasis ], x )
25+
26+
1827def spectrogram (y ):
19- D = _stft (_preemphasis (y ))
28+ D = _stft (preemphasis (y ))
2029 S = _amp_to_db (np .abs (D )) - hparams .ref_level_db
2130 return _normalize (S )
2231
2332
2433def inv_spectrogram (spectrogram ):
34+ '''Converts spectrogram to waveform using librosa'''
2535 S = _db_to_amp (_denormalize (spectrogram ) + hparams .ref_level_db ) # Convert back to linear
26- return _inv_preemphasis (_griffin_lim (S ** hparams .power )) # Reconstruct phase
36+ return inv_preemphasis (_griffin_lim (S ** hparams .power )) # Reconstruct phase
37+
38+
39+ def inv_spectrogram_tensorflow (spectrogram ):
40+ '''Builds computational graph to convert spectrogram to waveform using TensorFlow.
41+
42+ Unlike inv_spectrogram, this does NOT invert the preemphasis. The caller should call
43+ inv_preemphasis on the output after running the graph.
44+ '''
45+ S = _db_to_amp_tensorflow (_denormalize_tensorflow (spectrogram ) + hparams .ref_level_db )
46+ return _griffin_lim_tensorflow (tf .pow (S , hparams .power ))
2747
2848
2949def melspectrogram (y ):
30- D = _stft (_preemphasis (y ))
50+ D = _stft (preemphasis (y ))
3151 S = _amp_to_db (_linear_to_mel (np .abs (D )))
3252 return _normalize (S )
3353
3454
35- def inv_melspectrogram (melspectrogram ):
36- S = _mel_to_linear (_db_to_amp (_denormalize (melspectrogram ))) # Convert back to linear
37- return _inv_preemphasis (_griffin_lim (S ** hparams .power )) # Reconstruct phase
38-
39-
40- # Based on https://github.com/librosa/librosa/issues/434
4155def _griffin_lim (S ):
56+ '''librosa implementation of Griffin-Lim
57+ Based on https://github.com/librosa/librosa/issues/434
58+ '''
4259 angles = np .exp (2j * np .pi * np .random .rand (* S .shape ))
4360 S_complex = np .abs (S ).astype (np .complex )
61+ y = _istft (S_complex * angles )
4462 for i in range (hparams .griffin_lim_iters ):
45- if i > 0 :
46- angles = np .exp (1j * np .angle (_stft (y )))
63+ angles = np .exp (1j * np .angle (_stft (y )))
4764 y = _istft (S_complex * angles )
4865 return y
4966
5067
68+ def _griffin_lim_tensorflow (S ):
69+ '''TensorFlow implementation of Griffin-Lim
70+ Based on https://github.com/Kyubyong/tensorflow-exercises/blob/master/Audio_Processing.ipynb
71+ '''
72+ with tf .variable_scope ('griffinlim' ):
73+ # TensorFlow's stft and istft operate on a batch of spectrograms; create batch of size 1
74+ S = tf .expand_dims (S , 0 )
75+ S_complex = tf .identity (tf .cast (S , dtype = tf .complex64 ))
76+ y = _istft_tensorflow (S_complex )
77+ for i in range (hparams .griffin_lim_iters ):
78+ est = _stft_tensorflow (y )
79+ angles = est / tf .cast (tf .maximum (1e-8 , tf .abs (est )), tf .complex64 )
80+ y = _istft_tensorflow (S_complex * angles )
81+ return tf .squeeze (y , 0 )
82+
83+
5184def _stft (y ):
52- n_fft = (hparams .num_freq - 1 ) * 2
53- hop_length = int (hparams .frame_shift_ms / 1000 * hparams .sample_rate )
54- win_length = int (hparams .frame_length_ms / 1000 * hparams .sample_rate )
85+ n_fft , hop_length , win_length = _stft_parameters ()
5586 return librosa .stft (y = y , n_fft = n_fft , hop_length = hop_length , win_length = win_length )
5687
5788
5889def _istft (y ):
90+ _ , hop_length , win_length = _stft_parameters ()
91+ return librosa .istft (y , hop_length = hop_length , win_length = win_length )
92+
93+
94+ def _stft_tensorflow (signals ):
95+ n_fft , hop_length , win_length = _stft_parameters ()
96+ return tf .contrib .signal .stft (signals , win_length , hop_length , n_fft , pad_end = False )
97+
98+
99+ def _istft_tensorflow (stfts ):
100+ n_fft , hop_length , win_length = _stft_parameters ()
101+ return tf .contrib .signal .inverse_stft (stfts , win_length , hop_length , n_fft )
102+
103+
104+ def _stft_parameters ():
105+ n_fft = (hparams .num_freq - 1 ) * 2
59106 hop_length = int (hparams .frame_shift_ms / 1000 * hparams .sample_rate )
60107 win_length = int (hparams .frame_length_ms / 1000 * hparams .sample_rate )
61- return librosa . istft ( y , hop_length = hop_length , win_length = win_length )
108+ return n_fft , hop_length , win_length
62109
63110
64111# Conversions:
@@ -88,14 +135,14 @@ def _amp_to_db(x):
88135def _db_to_amp (x ):
89136 return np .power (10.0 , x * 0.05 )
90137
91- def _preemphasis (x ):
92- return signal .lfilter ([1 , - hparams .preemphasis ], [1 ], x )
93-
94- def _inv_preemphasis (x ):
95- return signal .lfilter ([1 ], [1 , - hparams .preemphasis ], x )
138+ def _db_to_amp_tensorflow (x ):
139+ return tf .pow (tf .ones (tf .shape (x )) * 10.0 , x * 0.05 )
96140
97141def _normalize (S ):
98142 return np .clip ((S - hparams .min_level_db ) / - hparams .min_level_db , 0 , 1 )
99143
100144def _denormalize (S ):
101145 return (np .clip (S , 0 , 1 ) * - hparams .min_level_db ) + hparams .min_level_db
146+
147+ def _denormalize_tensorflow (S ):
148+ return (tf .clip_by_value (S , 0 , 1 ) * - hparams .min_level_db ) + hparams .min_level_db
0 commit comments