Skip to content

Commit a9a87e9

Browse files
committed
pitch preprocessing update
1 parent 752e516 commit a9a87e9

File tree

3 files changed

+23
-6
lines changed

3 files changed

+23
-6
lines changed

.gitignore

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,11 @@ idea/*
2525
dataset/audio/__pycache__/__init__.cpython-36.pyc
2626
*.pyc
2727
Untitled.ipynb
28+
mel.npy
29+
*.png
30+
*.npy
31+
Testing/2log_v2/no_exp_before_bins_fs2v2_2_31k_test_tts.wav
32+
Testing/exp_log/test_tts.wav
33+
Testing/exp_log_v2/exp_before_bins_fs2v2_2_31k_test_tts.wav
34+
mel.png
35+
mel.npy

core/variance_predictor.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,9 @@ def inference(self, xs: torch.Tensor, olens = None, alpha: float = 1.0):
278278
"""
279279
f0_spec, f0_mean, f0_std = self.forward(xs, olens, x_masks=None) # (B, Tmax, 10)
280280
f0_reconstructed = self.inverse(f0_spec, f0_mean, f0_std)
281-
281+
#print(f0_reconstructed)
282+
f0_reconstructed = torch.exp(f0_reconstructed)
283+
#print(f0_reconstructed)
282284
return self.to_one_hot(f0_reconstructed)
283285

284286
def to_one_hot(self, x: torch.Tensor):

dataset/audio/pitch.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -79,15 +79,20 @@ def forward(
7979
# F0 extraction
8080

8181
# input shape = [T,]
82-
pitch = self._calculate_f0(input)
82+
pitch, pitch_log = self._calculate_f0(input)
8383
# (Optional): Adjust length to match with the mel-spectrogram
8484
if feats_lengths is not None:
8585
pitch = [
8686
self._adjust_num_frames(p, fl).view(-1)
8787
for p, fl in zip(pitch, feats_lengths)
8888
]
89-
pitch, mean, std = self._normalize(pitch, durations)
90-
coefs = self._cwt(pitch.numpy())
89+
pitch_log = [
90+
self._adjust_num_frames(p, fl).view(-1)
91+
for p, fl in zip(pitch_log, feats_lengths)
92+
]
93+
94+
pitch_log_norm, mean, std = self._normalize(pitch_log, durations)
95+
coefs = self._cwt(pitch_log_norm.numpy())
9196
# (Optional): Average by duration to calculate token-wise f0
9297
if self.use_token_averaged_f0:
9398
pitch = self._average_by_duration(pitch, durations)
@@ -112,10 +117,12 @@ def _calculate_f0(self, input: torch.Tensor) -> torch.Tensor:
112117
f0 = pyworld.stonemask(x, f0, timeaxis, self.fs)
113118
if self.use_continuous_f0:
114119
f0 = self._convert_to_continuous_f0(f0)
120+
115121
if self.use_log_f0:
116122
nonzero_idxs = np.where(f0 != 0)[0]
117-
f0[nonzero_idxs] = np.log(f0[nonzero_idxs])
118-
return input.new_tensor(f0.reshape(-1), dtype=torch.float)
123+
f0_log[nonzero_idxs] = np.log(f0[nonzero_idxs])
124+
125+
return input.new_tensor(f0.reshape(-1), dtype=torch.float), input.new_tensor(f0_log.reshape(-1), dtype=torch.float)
119126

120127

121128
@staticmethod

0 commit comments

Comments
 (0)