-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathdenoising.py
More file actions
110 lines (87 loc) · 4.35 KB
/
Copy pathdenoising.py
File metadata and controls
110 lines (87 loc) · 4.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
from model import *
from data_import import *
import sys, getopt
from scipy.io import wavfile
from scipy import signal
import os
class Denoising():
"""
Denoising Class holds all the necessary functions for denoising the noisy samples.
"""
def __init__(self, noisy_speech_folder='', sampled_noisy_speech_folder='', modfolder=''):
self.modfolder = modfolder
self.noisy_speech_folder = noisy_speech_folder
self.sampled_noisy_speech_folder = sampled_noisy_speech_folder
# SAMPLING FUNCTION
def sampling(self):
'''
Converts the input noisy audio files into required format and samples it to 16kHz.
'''
fs = 16000
filelist = os.listdir("%s"%(self.noisy_speech_folder))
filelist = [f for f in filelist if f.endswith(".wav")]
if not os.path.exists(self.sampled_noisy_speech_folder):
os.makedirs(self.sampled_noisy_speech_folder)
for i in tqdm(filelist):
sr, y = wavfile.read("%s/%s" % (self.noisy_speech_folder, i))
if y.dtype == 'int16':
nb_bits = 16 # -> 16-bit wav files
elif y.dtype == 'int32':
nb_bits = 32 # -> 32-bit wav files
# converting to 32 point floating values
y_float = y.astype(float) / (2.0**(nb_bits-1) + 1)
# sampling to 16kHz
samples = round(len(y_float) * fs/sr) # Number of samples to downsample
Y = signal.resample(y_float, int(samples))
wavfile.write(os.path.join(self.sampled_noisy_speech_folder, str(i)), fs, Y)
print "Converted all the input noisy samples to required format. The corresponding sampled audio files are present in the specified folder."
# INFERENCE FUNCTION
def inference(self, SE_LAYERS = 13, SE_CHANNELS = 64, SE_NORM = "NM", fs = 16000):
'''
Denoises the noisy samples and produces the corresponding denoised samples in the specified path.
Args:
SE_LAYERS (int) : Number of Internal Layers of the SENET model
SE_CHANNELS (int) : Number of feature channels per layer
SE_NORM (string) : Type of layer normalization (NM, SBN or None)
fs (int) : Sampling frequency or rate
'''
datafolder = self.sampled_noisy_speech_folder
if datafolder[-1] == '/':
datafolder = datafolder[:-1]
if not os.path.exists(datafolder+'_denoised'):
os.makedirs(datafolder+'_denoised')
# LOAD DATA
dataset = load_noisy_data_list(valfolder = datafolder)
dataset = load_noisy_data(dataset)
# SET LOSS FUNCTIONS AND PLACEHOLDERS
with tf.variable_scope(tf.get_variable_scope()):
input=tf.placeholder(tf.float32,shape=[None,1,None,1])
clean=tf.placeholder(tf.float32,shape=[None,1,None,1])
enhanced=senet(input, n_layers=SE_LAYERS, norm_type=SE_NORM, n_channels=SE_CHANNELS)
# INITIALIZE GPU CONFIG
config=tf.ConfigProto()
# config.gpu_options.allow_growth=True
sess=tf.Session(config=config)
print "Config ready"
sess.run(tf.global_variables_initializer())
print "Session initialized"
saver = tf.train.Saver([var for var in tf.trainable_variables() if var.name.startswith("se_")])
saver.restore(sess, "%s/se_model.ckpt" % self.modfolder)
for id in tqdm(range(0, len(dataset["innames"]))):
i = id # NON-RANDOMIZED ITERATION INDEX
inputData = dataset["inaudio"][i] # LOAD DEGRADED INPUT
# VALIDATION ITERATION
output = sess.run([enhanced], feed_dict={input: inputData})
output = np.reshape(output, -1)
wavfile.write("%s_denoised/%s" % (datafolder,dataset["shortnames"][i]), fs, output)
print "Denoised samples of the corresponding noisy samples have been created in the mentioned folder."
# MAIN
# if __name__ == '__main__':
# noisy_speech_folder = 'datasets/noisy_speech'
# sampled_noisy_speech_folder = 'datasets/sampled_noisy_speech'
# modfolder = "models"
# denoise = Denoising(noisy_speech_folder=noisy_speech_folder, sampled_noisy_speech_folder=sampled_noisy_speech_folder, modfolder=modfolder)
# denoise.sampling()
# denoise.inference()
# datafolder = sampled_noisy_speech_folder
# inference(valfolder=datafolder, modfolder=modfolder)