Skip to content

Commit b066529

Browse files
committed
HFA: fix fill small gap
1 parent eb357aa commit b066529

5 files changed

Lines changed: 15 additions & 15 deletions

File tree

src/apps/HubertFA/util/AlignWord.cpp

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,21 +23,22 @@ namespace HFA {
2323
return a >= b - EPS;
2424
}
2525

26-
Phoneme::Phoneme(const float start, const float end, const std::string &text)
27-
: start(std::max(0.0f, start)), end(std::max(0.0f, end)), text(text) {
26+
Phoneme::Phoneme(const float p_start, const float p_end, const std::string &text)
27+
: start(std::max(0.0f, p_start)), end(std::max(0.0f, p_end)), text(text) {
2828
if (!(0 <= start && start < end)) {
29-
std::cerr << "Warning: Phoneme Invalid: text=" << text << " start=" << start << ", end=" << end
29+
std::cerr << "Warning: Phoneme Invalid: text=" << text << " p_start=" << start << ", p_end=" << end
3030
<< std::endl;
3131
}
3232
}
3333

34-
Word::Word(float start, float end, const std::string &text, const bool init_phoneme)
35-
: start(std::max(0.0f, start)), end(std::max(0.0f, end)), text(text) {
34+
Word::Word(float w_start, float w_end, const std::string &text, const bool init_phoneme)
35+
: start(std::max(0.0f, w_start)), end(std::max(0.0f, w_end)), text(text) {
3636
if (!(0 <= start && start < end)) {
37-
std::cerr << "Warning: Word Invalid: text=" << text << " start=" << start << ", end=" << end << std::endl;
37+
std::cerr << "Warning: Word Invalid: text=" << text << " w_start=" << start << ", w_end=" << end
38+
<< std::endl;
3839
}
3940
if (init_phoneme) {
40-
phonemes.emplace_back(start, end, text);
41+
phonemes.emplace_back(w_start, w_end, text);
4142
}
4243
}
4344

src/apps/HubertFA/util/AlignWord.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ namespace HFA {
1212
float end;
1313
std::string text;
1414

15-
Phoneme(float start, float end, const std::string &text);
15+
Phoneme(float p_start, float p_end, const std::string &text);
1616
};
1717

1818
class Word {
@@ -22,7 +22,7 @@ namespace HFA {
2222
std::string text;
2323
std::vector<Phoneme> phonemes;
2424

25-
Word(float start, float end, const std::string &text, bool init_phoneme = false);
25+
Word(float w_start, float w_end, const std::string &text, bool init_phoneme = false);
2626

2727
float dur() const;
2828
void add_phoneme(const Phoneme &phoneme);

src/apps/HubertFA/util/AlignmentDecoder.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -208,9 +208,6 @@ namespace HFA {
208208
word_idx_last = word_idx;
209209
}
210210
}
211-
212-
// 填充小间隙
213-
words.fill_small_gaps(wav_length);
214211
return true;
215212
}
216213

src/apps/HubertFA/util/Hfa.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ namespace HFA {
7979
sf.seek(0, SEEK_SET);
8080
sf.read(audio.data(), static_cast<sf_count_t>(audio.size()));
8181

82-
const auto wav_length = static_cast<double>(sf.frames()) / hfa_input_sample_rate;
82+
const float wav_length = static_cast<float>(sf.frames()) / hfa_input_sample_rate;
8383

8484
if (wav_length > 60) {
8585
msg = "The audio contains continuous pronunciation segments that exceed 60 seconds. Please manually "
@@ -129,6 +129,7 @@ namespace HFA {
129129
for (const auto &word : word_list)
130130
words.add_AP(word);
131131

132+
words.fill_small_gaps(wav_length);
132133
words.clear_language_prefix();
133134
words.add_SP(wav_length, "SP");
134135
return true;

src/apps/HubertFA/util/HfaThread.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,10 @@ namespace HFA {
3939
auto tierPhones = std::make_shared<textgrid::IntervalTier>("phonemes", 0.0, words.duration());
4040

4141
for (auto word : words) {
42-
tierWords->AppendInterval(textgrid::Interval(word->start, word->end, word->text));
42+
tierWords->AppendInterval(textgrid::Interval(std::max(0.0f, word->start), word->end, word->text));
4343
for (auto phoneme : word->phonemes) {
44-
tierPhones->AppendInterval(textgrid::Interval(phoneme.start, phoneme.end, phoneme.text));
44+
tierPhones->AppendInterval(
45+
textgrid::Interval(std::max(0.0f, phoneme.start), phoneme.end, phoneme.text));
4546
}
4647
}
4748

0 commit comments

Comments
 (0)