forked from speechbrain/speechbrain
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpreprocess_dynamic_mixing.py
94 lines (79 loc) · 2.67 KB
/
preprocess_dynamic_mixing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
"""
This script allows to resample a folder which contains audio files.
The files are parsed recursively. An exact copy of the folder is created,
with same structure but contained resampled audio files.
Resampling is performed by using sox through torchaudio.
Author
------
Samuele Cornell, 2020
"""
import argparse
import glob
import os
from pathlib import Path
import numpy as np
import torch
import torchaudio
import tqdm
# from oct2py import octave
from scipy import signal
parser = argparse.ArgumentParser(
"utility for resampling all audio files in a folder recursively"
"It --input_folder to --output_folder and "
"resamples all audio files with specified format to --fs."
)
parser.add_argument("--input_folder", type=str, required=True)
parser.add_argument("--output_folder", type=str, required=True)
parser.add_argument(
"--fs", type=str, default=8000, help="this is the target sampling frequency"
)
parser.add_argument("--regex", type=str, default="**/*.wav")
def resample_folder(input_folder, output_folder, fs, regex):
"""Resamples the wav files within an input folder.
Arguments
---------
input_folder : path
Path of the folder to resample.
output_folder : path
Path of the output folder with the resampled data.
fs : int
Target sampling frequency.
regex : str
Regular expression for search.
"""
# filedir = os.path.dirname(os.path.realpath(__file__))
# octave.addpath(filedir)
# add the matlab functions to octave dir here
files = glob.glob(os.path.join(input_folder, regex), recursive=True)
for f in tqdm.tqdm(files):
audio, fs_read = torchaudio.load(f)
audio = audio[0].numpy()
audio = signal.resample_poly(audio, fs, fs_read)
# tmp = octave.activlev(audio.tolist(), fs, "n")
# audio, _ = tmp[:-1].squeeze(), tmp[-1]
peak = np.max(np.abs(audio))
audio = audio / peak
audio = torch.from_numpy(audio).float()
relative_path = os.path.join(
Path(f).relative_to(Path(input_folder)).parent,
Path(f).relative_to(Path(input_folder)).stem
+ "_peak_{}.wav".format(peak),
)
os.makedirs(
Path(
os.path.join(
output_folder, Path(f).relative_to(Path(input_folder))
)
).parent,
exist_ok=True,
)
torchaudio.save(
os.path.join(output_folder, relative_path),
audio.reshape(1, -1),
fs,
)
if __name__ == "__main__":
args = parser.parse_args()
resample_folder(
args.input_folder, args.output_folder, int(args.fs), args.regex
)