pitch preprocessing update

carankt · carankt · commit a9a87e9f771e · 2020-12-02T12:42:11.000+05:30
diff --git a/.gitignore b/.gitignore
@@ -25,3 +25,11 @@ idea/*
 dataset/audio/__pycache__/__init__.cpython-36.pyc
 *.pyc
 Untitled.ipynb
+mel.npy
+*.png
+*.npy
+Testing/2log_v2/no_exp_before_bins_fs2v2_2_31k_test_tts.wav
+Testing/exp_log/test_tts.wav
+Testing/exp_log_v2/exp_before_bins_fs2v2_2_31k_test_tts.wav
+mel.png
+mel.npy
diff --git a/core/variance_predictor.py b/core/variance_predictor.py
@@ -278,7 +278,9 @@ def inference(self, xs: torch.Tensor, olens = None, alpha: float = 1.0):
         """
         f0_spec, f0_mean, f0_std = self.forward(xs, olens, x_masks=None)  # (B, Tmax, 10)
         f0_reconstructed = self.inverse(f0_spec, f0_mean, f0_std)
-
+        #print(f0_reconstructed)
+        f0_reconstructed = torch.exp(f0_reconstructed)
+        #print(f0_reconstructed)
         return self.to_one_hot(f0_reconstructed)
 
     def to_one_hot(self, x: torch.Tensor):
diff --git a/dataset/audio/pitch.py b/dataset/audio/pitch.py
@@ -79,15 +79,20 @@ def forward(
         # F0 extraction
 
         # input shape = [T,]
-        pitch = self._calculate_f0(input)
+        pitch, pitch_log = self._calculate_f0(input)
         # (Optional): Adjust length to match with the mel-spectrogram
         if feats_lengths is not None:
             pitch = [
                 self._adjust_num_frames(p, fl).view(-1)
                 for p, fl in zip(pitch, feats_lengths)
             ]
-        pitch, mean, std = self._normalize(pitch, durations)
-        coefs = self._cwt(pitch.numpy())
+            pitch_log = [
+                self._adjust_num_frames(p, fl).view(-1)
+                for p, fl in zip(pitch_log, feats_lengths)
+            ]
+
+        pitch_log_norm, mean, std = self._normalize(pitch_log, durations)
+        coefs = self._cwt(pitch_log_norm.numpy())
         # (Optional): Average by duration to calculate token-wise f0
         if self.use_token_averaged_f0:
             pitch = self._average_by_duration(pitch, durations)
@@ -112,10 +117,12 @@ def _calculate_f0(self, input: torch.Tensor) -> torch.Tensor:
         f0 = pyworld.stonemask(x, f0, timeaxis, self.fs)
         if self.use_continuous_f0:
             f0 = self._convert_to_continuous_f0(f0)
+
         if self.use_log_f0:
             nonzero_idxs = np.where(f0 != 0)[0]
-            f0[nonzero_idxs] = np.log(f0[nonzero_idxs])
-        return input.new_tensor(f0.reshape(-1), dtype=torch.float)
+            f0_log[nonzero_idxs] = np.log(f0[nonzero_idxs])
+
+        return input.new_tensor(f0.reshape(-1), dtype=torch.float), input.new_tensor(f0_log.reshape(-1), dtype=torch.float)
 
 
     @staticmethod