@@ -79,15 +79,20 @@ def forward(
79
79
# F0 extraction
80
80
81
81
# input shape = [T,]
82
- pitch = self ._calculate_f0 (input )
82
+ pitch , pitch_log = self ._calculate_f0 (input )
83
83
# (Optional): Adjust length to match with the mel-spectrogram
84
84
if feats_lengths is not None :
85
85
pitch = [
86
86
self ._adjust_num_frames (p , fl ).view (- 1 )
87
87
for p , fl in zip (pitch , feats_lengths )
88
88
]
89
- pitch , mean , std = self ._normalize (pitch , durations )
90
- coefs = self ._cwt (pitch .numpy ())
89
+ pitch_log = [
90
+ self ._adjust_num_frames (p , fl ).view (- 1 )
91
+ for p , fl in zip (pitch_log , feats_lengths )
92
+ ]
93
+
94
+ pitch_log_norm , mean , std = self ._normalize (pitch_log , durations )
95
+ coefs = self ._cwt (pitch_log_norm .numpy ())
91
96
# (Optional): Average by duration to calculate token-wise f0
92
97
if self .use_token_averaged_f0 :
93
98
pitch = self ._average_by_duration (pitch , durations )
@@ -112,10 +117,12 @@ def _calculate_f0(self, input: torch.Tensor) -> torch.Tensor:
112
117
f0 = pyworld .stonemask (x , f0 , timeaxis , self .fs )
113
118
if self .use_continuous_f0 :
114
119
f0 = self ._convert_to_continuous_f0 (f0 )
120
+
115
121
if self .use_log_f0 :
116
122
nonzero_idxs = np .where (f0 != 0 )[0 ]
117
- f0 [nonzero_idxs ] = np .log (f0 [nonzero_idxs ])
118
- return input .new_tensor (f0 .reshape (- 1 ), dtype = torch .float )
123
+ f0_log [nonzero_idxs ] = np .log (f0 [nonzero_idxs ])
124
+
125
+ return input .new_tensor (f0 .reshape (- 1 ), dtype = torch .float ), input .new_tensor (f0_log .reshape (- 1 ), dtype = torch .float )
119
126
120
127
121
128
@staticmethod
0 commit comments