Skip to content

Commit 4ce31ca

Browse files
boji123刘柏基
authored and
刘柏基
committed
[debug] a better solution for mismatch of speech feat len and speech token len, refer to #1051
1 parent 587604b commit 4ce31ca

File tree

2 files changed

+10
-4
lines changed

2 files changed

+10
-4
lines changed

cosyvoice/dataset/processor.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ def truncate(data, truncate_length=24576, mode='train'):
159159

160160
def compute_fbank(data,
161161
feat_extractor,
162+
token_mel_ratio=2,
162163
mode='train'):
163164
""" Extract fbank
164165
@@ -174,8 +175,15 @@ def compute_fbank(data,
174175
assert 'utt' in sample
175176
assert 'text_token' in sample
176177
waveform = sample['speech']
177-
mat = feat_extractor(waveform).squeeze(dim=0).transpose(0, 1)
178-
sample['speech_feat'] = mat
178+
feat = feat_extractor(waveform).squeeze(dim=0).transpose(0, 1)
179+
180+
# padding with replicate mode (align to speech_token len * token_mel_ratio)
181+
pad_len = sample["speech_token"].shape[0] * token_mel_ratio - feat.shape[0]
182+
if pad_len > 0:
183+
feat_to_pad = feat[-1:].repeat((pad_len, 1))
184+
feat = torch.cat([feat, feat_to_pad], dim=0)
185+
186+
sample['speech_feat'] = feat
179187
yield sample
180188

181189

cosyvoice/flow/flow.py

-2
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,6 @@ def forward(
9292

9393
mask = (~make_pad_mask(feat_len)).to(h)
9494
# NOTE this is unnecessary, feat/h already same shape
95-
feat = F.interpolate(feat.unsqueeze(dim=1), size=h.shape[1:], mode="nearest").squeeze(dim=1)
9695
loss, _ = self.decoder.compute_loss(
9796
feat.transpose(1, 2).contiguous(),
9897
mask.unsqueeze(1),
@@ -214,7 +213,6 @@ def forward(
214213
h = self.encoder_proj(h)
215214

216215
# get conditions
217-
feat = F.interpolate(feat.unsqueeze(dim=1), size=h.shape[1:], mode="nearest").squeeze(dim=1)
218216
conds = torch.zeros(feat.shape, device=token.device)
219217
for i, j in enumerate(feat_len):
220218
if random.random() < 0.5:

0 commit comments

Comments
 (0)