-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathDecoding.cpp
More file actions
35 lines (30 loc) · 1.1 KB
/
Decoding.cpp
File metadata and controls
35 lines (30 loc) · 1.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#include <iostream>
#include <vector>
#include <unordered_map>
#include <string>
std::unordered_map<int, std::string> vocab = {
{101, "[CLS]"}, {102, "[SEP]"}, {0, "[PAD]"}, {7592, "Hello"}, {2088, "world"},
{2003, "is"}, {1037, "a"}, {2146, "test"}, {999, "!"}, {103, "##ing"}
};
std::string decodeTokens(const std::vector<int>& token_ids) {
std::string decoded_text;
for (int token_id : token_ids) {
if (vocab.find(token_id) != vocab.end()) {
std::string word = vocab[token_id];
if (word == "[CLS]" || word == "[SEP]" || word == "[PAD]") continue;
if (!decoded_text.empty() && word.substr(0, 2) == "##") {
decoded_text += word.substr(2);
} else {
if (!decoded_text.empty()) decoded_text += " ";
decoded_text += word;
}
}
}
return decoded_text;
}
int main() {
std::vector<int> token_ids = {101, 7592, 2088, 2003, 1037, 2146, 102, 999};
std::string decoded_text = decodeTokens(token_ids);
std::cout << "Decoded text: " << decoded_text << std::endl;
return 0;
}