Skip to content

Commit 8acb179

Browse files
author
LittleMouse
committed
[fix] Handles the situation where Either tagger or verbalizer file does not exist.
1 parent f775786 commit 8acb179

File tree

2 files changed

+78
-12
lines changed

2 files changed

+78
-12
lines changed

projects/llm_framework/main_melotts/src/main.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -183,10 +183,14 @@ class llm_task {
183183
awake_delay_ = config_body["awake_delay"].get<int>();
184184
else if (file_body["mode_param"].contains("awake_delay"))
185185
awake_delay_ = file_body["mode_param"]["awake_delay"];
186-
// Load lexicon
187-
lexicon_ = std::make_unique<Lexicon>(mode_config_.lexicon, mode_config_.tokens, mode_config_.tagger,
188-
mode_config_.verbalizer);
189-
// Read g.bin
186+
187+
if (!std::filesystem::exists(mode_config_.tagger) || !std::filesystem::exists(mode_config_.verbalizer)) {
188+
SLOGW("Either tagger or verbalizer file does not exist, using alternative lexicon.");
189+
lexicon_ = std::make_unique<Lexicon>(mode_config_.lexicon, mode_config_.tokens);
190+
} else {
191+
lexicon_ = std::make_unique<Lexicon>(mode_config_.lexicon, mode_config_.tokens, mode_config_.tagger,
192+
mode_config_.verbalizer);
193+
}
190194
g_matrix.resize(256, 0);
191195
FILE *fp = fopen(mode_config_.gbin.c_str(), "rb");
192196
if (!fp) {

projects/llm_framework/main_melotts/src/runner/Lexicon.hpp

Lines changed: 70 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ class Lexicon {
3030
std::pair<std::vector<int>, std::vector<int>> unknown_token;
3131
std::unordered_map<int, std::string> reverse_tokens;
3232

33-
wetext::Processor* m_processor;
33+
wetext::Processor* m_processor = nullptr;
3434

3535
public:
3636
Lexicon(const std::string& lexicon_filename, const std::string& tokens_filename, const std::string& tagger_filename,
@@ -96,6 +96,65 @@ class Lexicon {
9696
max_phrase_length);
9797
}
9898

99+
Lexicon(const std::string& lexicon_filename, const std::string& tokens_filename) : max_phrase_length(0)
100+
{
101+
SLOGD("Dictionary loading: %s Pronunciation table loading: %s", tokens_filename.c_str(),
102+
lexicon_filename.c_str());
103+
104+
std::unordered_map<std::string, int> tokens;
105+
std::ifstream ifs(tokens_filename);
106+
assert(ifs.is_open());
107+
std::string line;
108+
while (std::getline(ifs, line)) {
109+
auto splitted_line = split(line, ' ');
110+
if (splitted_line.size() >= 2) {
111+
int token_id = std::stoi(splitted_line[1]);
112+
tokens.insert({splitted_line[0], token_id});
113+
reverse_tokens[token_id] = splitted_line[0];
114+
}
115+
}
116+
ifs.close();
117+
ifs.open(lexicon_filename);
118+
assert(ifs.is_open());
119+
while (std::getline(ifs, line)) {
120+
auto splitted_line = split(line, ' ');
121+
if (splitted_line.empty()) continue;
122+
std::string word_or_phrase = splitted_line[0];
123+
auto chars = splitEachChar(word_or_phrase);
124+
max_phrase_length = std::max(max_phrase_length, chars.size());
125+
size_t phone_tone_len = splitted_line.size() - 1;
126+
size_t half_len = phone_tone_len / 2;
127+
std::vector<int> phones, tones;
128+
for (size_t i = 0; i < phone_tone_len; i++) {
129+
auto phone_or_tone = splitted_line[i + 1];
130+
if (i < half_len) {
131+
if (tokens.find(phone_or_tone) != tokens.end()) {
132+
phones.push_back(tokens[phone_or_tone]);
133+
}
134+
} else {
135+
tones.push_back(std::stoi(phone_or_tone));
136+
}
137+
}
138+
lexicon[word_or_phrase] = std::make_pair(phones, tones);
139+
}
140+
const std::vector<std::string> punctuation{"!", "?", "", ",", ".", "'", "-"};
141+
for (const auto& p : punctuation) {
142+
if (tokens.find(p) != tokens.end()) {
143+
int i = tokens[p];
144+
lexicon[p] = std::make_pair(std::vector<int>{i}, std::vector<int>{0});
145+
}
146+
}
147+
assert(tokens.find("_") != tokens.end());
148+
unknown_token = std::make_pair(std::vector<int>{tokens["_"]}, std::vector<int>{0});
149+
lexicon[" "] = unknown_token;
150+
lexicon[""] = lexicon[","];
151+
lexicon[""] = lexicon["."];
152+
lexicon[""] = lexicon["!"];
153+
lexicon[""] = lexicon["?"];
154+
SLOGD("Dictionary loading complete, containing %zu entries, longest phrase length: %zu", lexicon.size(),
155+
max_phrase_length);
156+
}
157+
99158
std::vector<std::string> splitEachChar(const std::string& text)
100159
{
101160
std::vector<std::string> words;
@@ -195,14 +254,17 @@ class Lexicon {
195254
{
196255
SLOGD("\nStarting text processing: \"%s\"", text.c_str());
197256

198-
std::string taggedText = m_processor->Tag(text);
199-
SLOGD("\taggedText processing: \"%s\"", taggedText.c_str());
200-
std::string normalizedText = m_processor->Verbalize(taggedText);
201-
SLOGD("\normalizedText processing: \"%s\"", normalizedText.c_str());
257+
std::string normalizedText;
258+
if (m_processor) {
259+
std::string taggedText = m_processor->Tag(text);
260+
SLOGD("\taggedText processing: \"%s\"", taggedText.c_str());
261+
normalizedText = m_processor->Verbalize(taggedText);
262+
SLOGD("\tnormalizedText processing: \"%s\"", normalizedText.c_str());
263+
} else {
264+
SLOGD("m_processor is not initialized, skipping tag and verbalize steps.");
265+
normalizedText = text;
266+
}
202267

203-
SLOGD("=======Matching Results=======");
204-
SLOGD("Unit\t|\tPhonemes\t|\tTones");
205-
SLOGD("-----------------------------");
206268
phones.insert(phones.end(), unknown_token.first.begin(), unknown_token.first.end());
207269
tones.insert(tones.end(), unknown_token.second.begin(), unknown_token.second.end());
208270
SLOGD("<BOS>\t|\t%s\t|\t%s", phonesToString(unknown_token.first).c_str(),

0 commit comments

Comments
 (0)