Skip to content

Commit e479b19

Browse files
authored
Merge pull request #18 from yuyun2000/dev
Add Japanese and trilingual text normalization for numbers and symbols
2 parents 7d392a3 + 0619178 commit e479b19

File tree

245 files changed

+66928
-134
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

245 files changed

+66928
-134
lines changed

projects/llm_framework/include/fst/accumulator.h

Lines changed: 903 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 248 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,248 @@
1+
// See www.openfst.org for extensive documentation on this weighted
2+
// finite-state transducer library.
3+
//
4+
// FST implementation class to attach an arbitrary object with a read/write
5+
// method to an FST and its file representation. The FST is given a new type
6+
// name.
7+
8+
#ifndef FST_ADD_ON_H_
9+
#define FST_ADD_ON_H_
10+
11+
#include <stddef.h>
12+
#include <memory>
13+
#include <string>
14+
#include <utility>
15+
16+
#include <fst/log.h>
17+
18+
#include <fst/fst.h>
19+
20+
21+
namespace fst {
22+
23+
// Identifies stream data as an add-on FST.
24+
static constexpr int32 kAddOnMagicNumber = 446681434;
25+
26+
// Nothing to save.
27+
class NullAddOn {
28+
public:
29+
NullAddOn() {}
30+
31+
static NullAddOn *Read(std::istream &strm, const FstReadOptions &opts) {
32+
return new NullAddOn();
33+
}
34+
35+
bool Write(std::ostream &ostrm, const FstWriteOptions &opts) const {
36+
return true;
37+
}
38+
};
39+
40+
// Create a new add-on from a pair of add-ons.
41+
template <class A1, class A2>
42+
class AddOnPair {
43+
public:
44+
// Argument reference count incremented.
45+
AddOnPair(std::shared_ptr<A1> a1, std::shared_ptr<A2> a2)
46+
: a1_(std::move(a1)), a2_(std::move(a2)) {}
47+
48+
const A1 *First() const { return a1_.get(); }
49+
50+
const A2 *Second() const { return a2_.get(); }
51+
52+
std::shared_ptr<A1> SharedFirst() const { return a1_; }
53+
54+
std::shared_ptr<A2> SharedSecond() const { return a2_; }
55+
56+
static AddOnPair<A1, A2> *Read(std::istream &istrm,
57+
const FstReadOptions &opts) {
58+
A1 *a1 = nullptr;
59+
bool have_addon1 = false;
60+
ReadType(istrm, &have_addon1);
61+
if (have_addon1) a1 = A1::Read(istrm, opts);
62+
63+
A2 *a2 = nullptr;
64+
bool have_addon2 = false;
65+
ReadType(istrm, &have_addon2);
66+
if (have_addon2) a2 = A2::Read(istrm, opts);
67+
68+
return new AddOnPair<A1, A2>(std::shared_ptr<A1>(a1),
69+
std::shared_ptr<A2>(a2));
70+
}
71+
72+
bool Write(std::ostream &ostrm, const FstWriteOptions &opts) const {
73+
bool have_addon1 = a1_ != nullptr;
74+
WriteType(ostrm, have_addon1);
75+
if (have_addon1) a1_->Write(ostrm, opts);
76+
bool have_addon2 = a2_ != nullptr;
77+
WriteType(ostrm, have_addon2);
78+
if (have_addon2) a2_->Write(ostrm, opts);
79+
return true;
80+
}
81+
82+
private:
83+
std::shared_ptr<A1> a1_;
84+
std::shared_ptr<A2> a2_;
85+
};
86+
87+
namespace internal {
88+
89+
// Adds an object of type T to an FST. T must support:
90+
//
91+
// T* Read(std::istream &);
92+
// bool Write(std::ostream &);
93+
//
94+
// The resulting type is a new FST implementation.
95+
template <class FST, class T>
96+
class AddOnImpl : public FstImpl<typename FST::Arc> {
97+
public:
98+
using Arc = typename FST::Arc;
99+
using Label = typename Arc::Label;
100+
using StateId = typename Arc::StateId;
101+
using Weight = typename Arc::Weight;
102+
103+
using FstImpl<Arc>::SetType;
104+
using FstImpl<Arc>::SetInputSymbols;
105+
using FstImpl<Arc>::SetOutputSymbols;
106+
using FstImpl<Arc>::SetProperties;
107+
using FstImpl<Arc>::WriteHeader;
108+
109+
// We make a thread-safe copy of the FST by default since an FST
110+
// implementation is expected to not share mutable data between objects.
111+
AddOnImpl(const FST &fst, const string &type,
112+
std::shared_ptr<T> t = std::shared_ptr<T>())
113+
: fst_(fst, true), t_(std::move(t)) {
114+
SetType(type);
115+
SetProperties(fst_.Properties(kFstProperties, false));
116+
SetInputSymbols(fst_.InputSymbols());
117+
SetOutputSymbols(fst_.OutputSymbols());
118+
}
119+
120+
// Conversion from const Fst<Arc> & to F always copies the underlying
121+
// implementation.
122+
AddOnImpl(const Fst<Arc> &fst, const string &type,
123+
std::shared_ptr<T> t = std::shared_ptr<T>())
124+
: fst_(fst), t_(std::move(t)) {
125+
SetType(type);
126+
SetProperties(fst_.Properties(kFstProperties, false));
127+
SetInputSymbols(fst_.InputSymbols());
128+
SetOutputSymbols(fst_.OutputSymbols());
129+
}
130+
131+
// We make a thread-safe copy of the FST by default since an FST
132+
// implementation is expected to not share mutable data between objects.
133+
AddOnImpl(const AddOnImpl<FST, T> &impl)
134+
: fst_(impl.fst_, true), t_(impl.t_) {
135+
SetType(impl.Type());
136+
SetProperties(fst_.Properties(kCopyProperties, false));
137+
SetInputSymbols(fst_.InputSymbols());
138+
SetOutputSymbols(fst_.OutputSymbols());
139+
}
140+
141+
StateId Start() const { return fst_.Start(); }
142+
143+
Weight Final(StateId s) const { return fst_.Final(s); }
144+
145+
size_t NumArcs(StateId s) const { return fst_.NumArcs(s); }
146+
147+
size_t NumInputEpsilons(StateId s) const { return fst_.NumInputEpsilons(s); }
148+
149+
size_t NumOutputEpsilons(StateId s) const {
150+
return fst_.NumOutputEpsilons(s);
151+
}
152+
153+
size_t NumStates() const { return fst_.NumStates(); }
154+
155+
static AddOnImpl<FST, T> *Read(std::istream &strm,
156+
const FstReadOptions &opts) {
157+
FstReadOptions nopts(opts);
158+
FstHeader hdr;
159+
if (!nopts.header) {
160+
hdr.Read(strm, nopts.source);
161+
nopts.header = &hdr;
162+
}
163+
std::unique_ptr<AddOnImpl<FST, T>> impl(
164+
new AddOnImpl<FST, T>(nopts.header->FstType()));
165+
if (!impl->ReadHeader(strm, nopts, kMinFileVersion, &hdr)) return nullptr;
166+
impl.reset();
167+
int32 magic_number = 0;
168+
ReadType(strm, &magic_number); // Ensures this is an add-on FST.
169+
if (magic_number != kAddOnMagicNumber) {
170+
LOG(ERROR) << "AddOnImpl::Read: Bad add-on header: " << nopts.source;
171+
return nullptr;
172+
}
173+
FstReadOptions fopts(opts);
174+
fopts.header = nullptr; // Contained header was written out.
175+
std::unique_ptr<FST> fst(FST::Read(strm, fopts));
176+
if (!fst) return nullptr;
177+
std::shared_ptr<T> t;
178+
bool have_addon = false;
179+
ReadType(strm, &have_addon);
180+
if (have_addon) { // Reads add-on object if present.
181+
t = std::shared_ptr<T>(T::Read(strm, fopts));
182+
if (!t) return nullptr;
183+
}
184+
return new AddOnImpl<FST, T>(*fst, nopts.header->FstType(), t);
185+
}
186+
187+
bool Write(std::ostream &strm, const FstWriteOptions &opts) const {
188+
FstHeader hdr;
189+
FstWriteOptions nopts(opts);
190+
nopts.write_isymbols = false; // Allows contained FST to hold any symbols.
191+
nopts.write_osymbols = false;
192+
WriteHeader(strm, nopts, kFileVersion, &hdr);
193+
WriteType(strm, kAddOnMagicNumber); // Ensures this is an add-on FST.
194+
FstWriteOptions fopts(opts);
195+
fopts.write_header = true; // Forces writing contained header.
196+
if (!fst_.Write(strm, fopts)) return false;
197+
bool have_addon = !!t_;
198+
WriteType(strm, have_addon);
199+
// Writes add-on object if present.
200+
if (have_addon) t_->Write(strm, opts);
201+
return true;
202+
}
203+
204+
void InitStateIterator(StateIteratorData<Arc> *data) const {
205+
fst_.InitStateIterator(data);
206+
}
207+
208+
void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const {
209+
fst_.InitArcIterator(s, data);
210+
}
211+
212+
FST &GetFst() { return fst_; }
213+
214+
const FST &GetFst() const { return fst_; }
215+
216+
const T *GetAddOn() const { return t_.get(); }
217+
218+
std::shared_ptr<T> GetSharedAddOn() const { return t_; }
219+
220+
void SetAddOn(std::shared_ptr<T> t) { t_ = t; }
221+
222+
private:
223+
explicit AddOnImpl(const string &type) : t_() {
224+
SetType(type);
225+
SetProperties(kExpanded);
226+
}
227+
228+
// Current file format version.
229+
static constexpr int kFileVersion = 1;
230+
// Minimum file format version supported.
231+
static constexpr int kMinFileVersion = 1;
232+
233+
FST fst_;
234+
std::shared_ptr<T> t_;
235+
236+
AddOnImpl &operator=(const AddOnImpl &) = delete;
237+
};
238+
239+
template <class FST, class T>
240+
constexpr int AddOnImpl<FST, T>::kFileVersion;
241+
242+
template <class FST, class T>
243+
constexpr int AddOnImpl<FST, T>::kMinFileVersion;
244+
245+
} // namespace internal
246+
} // namespace fst
247+
248+
#endif // FST_ADD_ON_H_

0 commit comments

Comments
 (0)