speechdenoising/denoising.py at master · kuntojirohan/speechdenoising · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
from model import *
from data_import import *
import sys, getopt
from scipy.io import wavfile
from scipy import signal
import os

class Denoising():
    """
    Denoising Class holds all the necessary functions for denoising the noisy samples.
    """

    def __init__(self, noisy_speech_folder='', sampled_noisy_speech_folder='', modfolder=''):

        self.modfolder = modfolder
        self.noisy_speech_folder = noisy_speech_folder
        self.sampled_noisy_speech_folder = sampled_noisy_speech_folder

    # SAMPLING FUNCTION
    def sampling(self):
        '''
        Converts the input noisy audio files into required format and samples it to 16kHz.

        '''

        fs = 16000
        filelist = os.listdir("%s"%(self.noisy_speech_folder))
        filelist = [f for f in filelist if f.endswith(".wav")]
        if not os.path.exists(self.sampled_noisy_speech_folder):
            os.makedirs(self.sampled_noisy_speech_folder)

        for i in tqdm(filelist):
            sr, y = wavfile.read("%s/%s" % (self.noisy_speech_folder, i))
            if y.dtype == 'int16':
                nb_bits = 16 # -> 16-bit wav files
            elif y.dtype == 'int32':
                nb_bits = 32 # -> 32-bit wav files
            # converting to 32 point floating values
            y_float = y.astype(float) / (2.0**(nb_bits-1) + 1)
            # sampling to 16kHz
            samples = round(len(y_float) * fs/sr) # Number of samples to downsample
            Y = signal.resample(y_float, int(samples))
            wavfile.write(os.path.join(self.sampled_noisy_speech_folder, str(i)), fs, Y)

        print "Converted all the input noisy samples to required format. The corresponding sampled audio files are present in the specified folder."


    # INFERENCE FUNCTION
    def inference(self, SE_LAYERS = 13, SE_CHANNELS = 64, SE_NORM = "NM", fs = 16000):
        '''
        Denoises the noisy samples and produces the corresponding denoised samples in the specified path.
        Args:
            SE_LAYERS (int) : Number of Internal Layers of the SENET model
            SE_CHANNELS (int) : Number of feature channels per layer
            SE_NORM (string) : Type of layer normalization (NM, SBN or None)
            fs (int) : Sampling frequency or rate

        '''

        datafolder = self.sampled_noisy_speech_folder
        if datafolder[-1] == '/':
            datafolder = datafolder[:-1]
        if not os.path.exists(datafolder+'_denoised'):
            os.makedirs(datafolder+'_denoised')

        # LOAD DATA
        dataset = load_noisy_data_list(valfolder = datafolder)
        dataset = load_noisy_data(dataset)

        # SET LOSS FUNCTIONS AND PLACEHOLDERS
        with tf.variable_scope(tf.get_variable_scope()):
            input=tf.placeholder(tf.float32,shape=[None,1,None,1])
            clean=tf.placeholder(tf.float32,shape=[None,1,None,1])

            enhanced=senet(input, n_layers=SE_LAYERS, norm_type=SE_NORM, n_channels=SE_CHANNELS)

        # INITIALIZE GPU CONFIG
        config=tf.ConfigProto()
        # config.gpu_options.allow_growth=True
        sess=tf.Session(config=config)
        print "Config ready"
        sess.run(tf.global_variables_initializer())
        print "Session initialized"
        saver = tf.train.Saver([var for var in tf.trainable_variables() if var.name.startswith("se_")])
        saver.restore(sess, "%s/se_model.ckpt" % self.modfolder)

        for id in tqdm(range(0, len(dataset["innames"]))):
            i = id # NON-RANDOMIZED ITERATION INDEX
            inputData = dataset["inaudio"][i] # LOAD DEGRADED INPUT
            # VALIDATION ITERATION
            output = sess.run([enhanced], feed_dict={input: inputData})
            output = np.reshape(output, -1)
            wavfile.write("%s_denoised/%s" % (datafolder,dataset["shortnames"][i]), fs, output)

        print "Denoised samples of the corresponding noisy samples have been created in the mentioned folder."


# MAIN
# if __name__ == '__main__':
#     noisy_speech_folder = 'datasets/noisy_speech'
#     sampled_noisy_speech_folder = 'datasets/sampled_noisy_speech'
#     modfolder = "models"
#     denoise = Denoising(noisy_speech_folder=noisy_speech_folder, sampled_noisy_speech_folder=sampled_noisy_speech_folder, modfolder=modfolder)
#     denoise.sampling()
#     denoise.inference()
    # datafolder = sampled_noisy_speech_folder
    # inference(valfolder=datafolder, modfolder=modfolder)