-
Notifications
You must be signed in to change notification settings - Fork 22
Expand file tree
/
Copy pathTextTokenizer.h
More file actions
34 lines (22 loc) · 830 Bytes
/
TextTokenizer.h
File metadata and controls
34 lines (22 loc) · 830 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#pragma once
#include <vector>
#include <string>
#include "VoxCommon.hpp"
#include "Numbertext.hxx"
class TextTokenizer
{
private:
std::u32string AllowedChars;
std::vector<std::string> ExpandNumbers(const std::vector<std::string>& SpaceTokens);
Numbertext* CuNumber;
std::string NumLang;
// Go through the string and add spaces before and after punctuation.
// This is because ExpandNumbers won't recognize numbers if they've got punctuation like 500, or .9000
std::string SpaceChars(const std::string& InStr);
public:
TextTokenizer();
~TextTokenizer();
void SetNumberText(Numbertext& INum,const std::string& Lang);
std::vector<std::string> Tokenize(const std::string& InTxt, bool IsTacotron = false, bool IsTorchMoji = false);
void SetAllowedChars(const std::string &value);
};