Skip to content

Commit dba2ee3

Browse files
committed
fix phone
1 parent 8132134 commit dba2ee3

7 files changed

Lines changed: 83 additions & 88 deletions

File tree

src/apps/HubertFA/util/AlignWord.cpp

Lines changed: 53 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -7,77 +7,77 @@
77

88
namespace HFA {
99

10-
Phoneme::Phoneme(const float start, const float end, const std::string &text)
10+
Phone::Phone(const float start, const float end, const std::string &text)
1111
: start(std::max(0.0f, start)), end(end), text(text) {
1212
if (!(this->start < this->end)) {
13-
const std::string error_msg = "Phoneme Invalid: text=" + text + " start=" + std::to_string(this->start) +
13+
const std::string error_msg = "Phone Invalid: text=" + text + " start=" + std::to_string(this->start) +
1414
", end=" + std::to_string(this->end);
1515
throw std::runtime_error(error_msg);
1616
}
1717
}
1818

19-
Word::Word(const float start, const float end, const std::string &text, const bool init_phoneme)
19+
Word::Word(const float start, const float end, const std::string &text, const bool init_phone)
2020
: start(std::max(0.0f, start)), end(end), text(text) {
2121
if (!(this->start < this->end)) {
2222
const std::string error_msg = "Word Invalid: text=" + text + " start=" + std::to_string(this->start) +
2323
", end=" + std::to_string(this->end);
2424
throw std::runtime_error(error_msg);
2525
}
2626

27-
if (init_phoneme) {
28-
phonemes.emplace_back(this->start, this->end, this->text);
27+
if (init_phone) {
28+
phones.emplace_back(this->start, this->end, this->text);
2929
}
3030
}
3131

3232
float Word::dur() const {
3333
return end - start;
3434
}
3535

36-
void Word::add_phoneme(const Phoneme &phoneme) {
37-
if (phoneme.start == phoneme.end) {
38-
const std::string warning_msg = phoneme.text + " phoneme长度为0,非法";
36+
void Word::add_phone(const Phone &phone) {
37+
if (phone.start == phone.end) {
38+
const std::string warning_msg = phone.text + " phone长度为0,非法";
3939
_add_log("WARNING: " + warning_msg);
4040
return;
4141
}
42-
if (phoneme.start >= start && phoneme.end <= end) {
43-
phonemes.push_back(phoneme);
42+
if (phone.start >= start && phone.end <= end) {
43+
phones.push_back(phone);
4444
} else {
45-
const std::string warning_msg = phoneme.text + ": phoneme边界超出word,添加失败";
45+
const std::string warning_msg = phone.text + ": phone边界超出word,添加失败";
4646
_add_log("WARNING: " + warning_msg);
4747
}
4848
}
4949

50-
void Word::append_phoneme(const Phoneme &phoneme) {
51-
if (phoneme.start == phoneme.end) {
52-
const std::string warning_msg = phoneme.text + " phoneme长度为0,非法";
50+
void Word::append_phone(const Phone &phone) {
51+
if (phone.start == phone.end) {
52+
const std::string warning_msg = phone.text + " phone长度为0,非法";
5353
_add_log("WARNING: " + warning_msg);
5454
return;
5555
}
5656

57-
if (phonemes.empty()) {
58-
if (std::abs(phoneme.start - start) < 1e-6) {
59-
phonemes.push_back(phoneme);
60-
end = phoneme.end;
57+
if (phones.empty()) {
58+
if (std::abs(phone.start - start) < 1e-6) {
59+
phones.push_back(phone);
60+
end = phone.end;
6161
} else {
62-
const std::string warning_msg = phoneme.text + ": phoneme左边界超出word,添加失败";
62+
const std::string warning_msg = phone.text + ": phone左边界超出word,添加失败";
6363
_add_log("WARNING: " + warning_msg);
6464
}
6565
} else {
66-
if (std::abs(phoneme.start - phonemes.back().end) < 1e-6) {
67-
phonemes.push_back(phoneme);
68-
end = phoneme.end;
66+
if (std::abs(phone.start - phones.back().end) < 1e-6) {
67+
phones.push_back(phone);
68+
end = phone.end;
6969
} else {
70-
const std::string warning_msg = phoneme.text + ": phoneme添加失败";
70+
const std::string warning_msg = phone.text + ": phone添加失败";
7171
_add_log("WARNING: " + warning_msg);
7272
}
7373
}
7474
}
7575

7676
void Word::move_start(float new_start) {
7777
new_start = std::max(0.0f, new_start);
78-
if (0 <= new_start && new_start < phonemes[0].end) {
78+
if (0 <= new_start && new_start < phones[0].end) {
7979
start = new_start;
80-
phonemes[0].start = new_start;
80+
phones[0].start = new_start;
8181
} else {
8282
const std::string warning_msg = text + ": start >= first_phone_end,无法调整word边界";
8383
_add_log("WARNING: " + warning_msg);
@@ -86,9 +86,9 @@ namespace HFA {
8686

8787
void Word::move_end(float new_end) {
8888
new_end = std::max(0.0f, new_end);
89-
if (new_end > phonemes.back().start && new_end >= 0) {
89+
if (new_end > phones.back().start && new_end >= 0) {
9090
end = new_end;
91-
phonemes.back().end = new_end;
91+
phones.back().end = new_end;
9292
} else {
9393
const std::string warning_msg = text + ": new_end <= first_phone_start,无法调整word边界";
9494
_add_log("WARNING: " + warning_msg);
@@ -165,7 +165,7 @@ namespace HFA {
165165
}
166166

167167
void WordList::append(const Word &word) {
168-
if (word.phonemes.empty()) {
168+
if (word.phones.empty()) {
169169
const std::string warning_msg = word.text + ": phones为空,非法word";
170170
_add_log("WARNING: " + warning_msg);
171171
return;
@@ -186,8 +186,8 @@ namespace HFA {
186186

187187
void WordList::add_AP(const Word &new_word, float min_dur) {
188188
try {
189-
if (new_word.phonemes.empty()) {
190-
const std::string warning_msg = new_word.text + " phonemes为空,非法word";
189+
if (new_word.phones.empty()) {
190+
const std::string warning_msg = new_word.text + " phones为空,非法word";
191191
_add_log("WARNING: " + warning_msg);
192192
return;
193193
}
@@ -311,10 +311,10 @@ namespace HFA {
311311
}
312312
}
313313

314-
std::vector<std::string> WordList::phonemes() const {
314+
std::vector<std::string> WordList::phones() const {
315315
std::vector<std::string> result;
316316
for (const auto &word : words_) {
317-
for (const auto &ph : word.phonemes) {
317+
for (const auto &ph : word.phones) {
318318
result.push_back(ph.text);
319319
}
320320
}
@@ -332,10 +332,10 @@ namespace HFA {
332332

333333
void WordList::clear_language_prefix() {
334334
for (auto &word : words_) {
335-
for (auto &phoneme : word.phonemes) {
336-
const size_t pos = phoneme.text.find_last_of('/');
335+
for (auto &phone : word.phones) {
336+
const size_t pos = phone.text.find_last_of('/');
337337
if (pos != std::string::npos) {
338-
phoneme.text = phoneme.text.substr(pos + 1);
338+
phone.text = phone.text.substr(pos + 1);
339339
}
340340
}
341341
}
@@ -357,44 +357,43 @@ namespace HFA {
357357
return false;
358358
}
359359

360-
if (word.phonemes.empty()) {
361-
const std::string warning_msg = "Word '" + word.text + "' has no phonemes";
360+
if (word.phones.empty()) {
361+
const std::string warning_msg = "Word '" + word.text + "' has no phones";
362362
_add_log("WARNING: " + warning_msg);
363363
return false;
364364
}
365365

366-
if (std::abs(word.phonemes[0].start - word.start) > 1e-6) {
367-
const std::string warning_msg = "Word '" + word.text + "' first phoneme start(" +
368-
std::to_string(word.phonemes[0].start) + ") != word start(" +
366+
if (std::abs(word.phones[0].start - word.start) > 1e-6) {
367+
const std::string warning_msg = "Word '" + word.text + "' first phone start(" +
368+
std::to_string(word.phones[0].start) + ") != word start(" +
369369
std::to_string(word.start) + ")";
370370
_add_log("WARNING: " + warning_msg);
371371
return false;
372372
}
373373

374-
if (std::abs(word.phonemes.back().end - word.end) > 1e-6) {
375-
const std::string warning_msg = "Word '" + word.text + "' last phoneme end(" +
376-
std::to_string(word.phonemes.back().end) + ") != word end(" +
374+
if (std::abs(word.phones.back().end - word.end) > 1e-6) {
375+
const std::string warning_msg = "Word '" + word.text + "' last phone end(" +
376+
std::to_string(word.phones.back().end) + ") != word end(" +
377377
std::to_string(word.end) + ")";
378378
_add_log("WARNING: " + warning_msg);
379379
return false;
380380
}
381381

382-
for (size_t j = 0; j < word.phonemes.size(); j++) {
383-
if (!(word.phonemes[j].start < word.phonemes[j].end)) {
382+
for (size_t j = 0; j < word.phones.size(); j++) {
383+
if (!(word.phones[j].start < word.phones[j].end)) {
384384
const std::string warning_msg =
385-
"Word '" + word.text + "' phoneme '" + word.phonemes[j].text +
386-
"' has invalid time order: start=" + std::to_string(word.phonemes[j].start) +
387-
", end=" + std::to_string(word.phonemes[j].end);
385+
"Word '" + word.text + "' phone '" + word.phones[j].text +
386+
"' has invalid time order: start=" + std::to_string(word.phones[j].start) +
387+
", end=" + std::to_string(word.phones[j].end);
388388
_add_log("WARNING: " + warning_msg);
389389
return false;
390390
}
391391

392-
if (j < word.phonemes.size() - 1 &&
393-
std::abs(word.phonemes[j].end - word.phonemes[j + 1].start) > 1e-6) {
394-
const std::string warning_msg = "Word '" + word.text + "' phoneme '" + word.phonemes[j].text +
395-
"' end(" + std::to_string(word.phonemes[j].end) +
396-
") != next phoneme '" + word.phonemes[j + 1].text + "' start(" +
397-
std::to_string(word.phonemes[j + 1].start) + ")";
392+
if (j < word.phones.size() - 1 && std::abs(word.phones[j].end - word.phones[j + 1].start) > 1e-6) {
393+
const std::string warning_msg = "Word '" + word.text + "' phone '" + word.phones[j].text +
394+
"' end(" + std::to_string(word.phones[j].end) +
395+
") != next phone '" + word.phones[j + 1].text + "' start(" +
396+
std::to_string(word.phones[j + 1].start) + ")";
398397
_add_log("WARNING: " + warning_msg);
399398
return false;
400399
}

src/apps/HubertFA/util/AlignWord.h

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,28 +6,28 @@
66

77
namespace HFA {
88

9-
class Phoneme {
9+
class Phone {
1010
public:
1111
float start;
1212
float end;
1313
std::string text;
1414

15-
Phoneme(float start, float end, const std::string &text);
15+
Phone(float start, float end, const std::string &text);
1616
};
1717

1818
class Word {
1919
public:
2020
float start;
2121
float end;
2222
std::string text;
23-
std::vector<Phoneme> phonemes;
23+
std::vector<Phone> phones;
2424
std::vector<std::string> log;
2525

26-
Word(float start, float end, const std::string &text, bool init_phoneme = false);
26+
Word(float start, float end, const std::string &text, bool init_phone = false);
2727

2828
float dur() const;
29-
void add_phoneme(const Phoneme &phoneme);
30-
void append_phoneme(const Phoneme &phoneme);
29+
void add_phone(const Phone &phone);
30+
void append_phone(const Phone &phoneme);
3131
void move_start(float new_start);
3232
void move_end(float new_end);
3333

@@ -38,7 +38,7 @@ namespace HFA {
3838

3939
class WordList {
4040
std::vector<Word> words_;
41-
std::vector<std::string> log_; // 日志列表
41+
std::vector<std::string> log_;
4242

4343
void _add_log(const std::string &message);
4444
static std::vector<std::pair<float, float>>
@@ -50,7 +50,6 @@ namespace HFA {
5050
WordList(const WordList &) = default;
5151
WordList &operator=(const WordList &) = default;
5252

53-
// 标准容器接口
5453
using iterator = std::vector<Word>::iterator;
5554
using const_iterator = std::vector<Word>::const_iterator;
5655

@@ -122,20 +121,17 @@ namespace HFA {
122121
return 0.0;
123122
}
124123

125-
// 核心功能方法
126124
std::vector<Word> overlapping_words(const Word &new_word) const;
127125
void append(const Word &word);
128126
void add_AP(const Word &new_word, float min_dur = 0.1f);
129127
void fill_small_gaps(float wav_length, float gap_length = 0.1f);
130128
void add_SP(float wav_length, const std::string &add_phone = "SP");
131129

132-
// 获取方法
133-
std::vector<std::string> phonemes() const;
130+
std::vector<std::string> phones() const;
134131
std::vector<std::pair<float, float>> intervals() const;
135132
void clear_language_prefix();
136133
bool check();
137134

138-
// 日志相关方法
139135
std::string get_log() const;
140136
void clear_log();
141137
};

src/apps/HubertFA/util/AlignmentDecoder.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ namespace HFA {
154154
std::vector<int> ph_seq_id;
155155
for (const auto &ph : ph_seq) {
156156
if (vocab_.find(ph) == vocab_.end()) {
157-
msg = "Phoneme '" + ph + "' not found in vocabulary.";
157+
msg = "Phone '" + ph + "' not found in vocabulary.";
158158
return false;
159159
}
160160
ph_seq_id.push_back(vocab_.at(ph));
@@ -292,7 +292,7 @@ namespace HFA {
292292
float start = ph_intervals[i].first;
293293
float end = ph_intervals[i].second;
294294

295-
Phoneme phoneme(start, end, ph_text);
295+
Phone phoneme(start, end, ph_text);
296296

297297
int word_idx;
298298
if (ph_idx_to_word_idx.empty()) {
@@ -313,13 +313,13 @@ namespace HFA {
313313
}
314314

315315
if (word_idx == word_idx_last && has_current_word) {
316-
current_word.append_phoneme(phoneme);
316+
current_word.append_phone(phoneme);
317317
current_word.end = phoneme.end;
318318
} else {
319319
if (has_current_word)
320320
words.append(current_word);
321321
current_word = Word(start, end, word_text);
322-
current_word.add_phoneme(phoneme);
322+
current_word.add_phone(phoneme);
323323
has_current_word = true;
324324
word_idx_last = word_idx;
325325
}

src/apps/HubertFA/util/HfaThread.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,11 @@ namespace HFA {
3636
textgrid::TextGrid outTg(0.0, words.duration());
3737

3838
auto tierWords = std::make_shared<textgrid::IntervalTier>("words", 0.0, words.duration());
39-
auto tierPhones = std::make_shared<textgrid::IntervalTier>("phonemes", 0.0, words.duration());
39+
auto tierPhones = std::make_shared<textgrid::IntervalTier>("phones", 0.0, words.duration());
4040

4141
for (auto word : words) {
4242
tierWords->AppendInterval(textgrid::Interval(std::max(0.0f, word.start), word.end, word.text));
43-
for (auto phoneme : word.phonemes) {
43+
for (auto phoneme : word.phones) {
4444
tierPhones->AppendInterval(
4545
textgrid::Interval(std::max(0.0f, phoneme.start), phoneme.end, phoneme.text));
4646
}

src/apps/HubertFA/util/NonLexicalDecoder.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ namespace HFA {
117117
const float start_time = start * frame_length_;
118118
const float end_time = end * frame_length_;
119119
auto word = Word(start_time, end_time, tag);
120-
word.add_phoneme(Phoneme(start_time, end_time, tag));
120+
word.add_phone(Phone(start_time, end_time, tag));
121121
words.push_back(word);
122122
}
123123
start = -1;
@@ -131,7 +131,7 @@ namespace HFA {
131131
const float start_time = start * frame_length_;
132132
const float end_time = (prob.size() - 1) * frame_length_;
133133
auto word = Word(start_time, end_time, tag);
134-
word.add_phoneme(Phoneme(start_time, end_time, tag));
134+
word.add_phone(Phone(start_time, end_time, tag));
135135
words.push_back(word);
136136
}
137137

0 commit comments

Comments
 (0)