Skip to content

Commit 724fc93

Browse files
authored
Improve the memory of the Wasm inference engine (#1135)
* Add a clear method to the AlignedMemory as it's just used to transfer memory from the Wasm interface into the rest of the code. Otherwise it is retained for the lifetime of the application. * Apply the tensor allocator size changes to Wasm only * Update the build-wasm script to use proper loggers and add a --fast flag * Re-work marian::io::binary::loadItems to avoid unnecessary copies * Add a shared pointer for the items * Address review comments
1 parent 2fe6b2b commit 724fc93

File tree

16 files changed

+221
-95
lines changed

16 files changed

+221
-95
lines changed

inference/marian-fork/src/common/binary.cpp

Lines changed: 51 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -28,34 +28,50 @@ const T* get(const void*& current, uint64_t num = 1) {
2828
}
2929

3030
void loadItems(const void* current, std::vector<io::Item>& items, bool mapped) {
31+
uint64_t totalBytesLoaded = 0; // Track total bytes loaded
32+
3133
uint64_t binaryFileVersion = *get<uint64_t>(current);
3234
ABORT_IF(binaryFileVersion != BINARY_FILE_VERSION,
3335
"Binary file versions do not match: {} (file) != {} (expected)",
3436
binaryFileVersion,
3537
BINARY_FILE_VERSION);
3638

37-
uint64_t numHeaders = *get<uint64_t>(current); // number of item headers that follow
38-
const Header* headers = get<Header>(current, numHeaders); // read that many headers
39+
totalBytesLoaded += sizeof(uint64_t); // Account for binaryFileVersion
40+
41+
uint64_t numHeaders = *get<uint64_t>(current); // number of item headers that follow
42+
totalBytesLoaded += sizeof(uint64_t); // Account for numHeaders
43+
44+
const Header* headers = get<Header>(current, numHeaders); // read that many headers
45+
totalBytesLoaded += sizeof(Header) * numHeaders; // Account for headers
46+
47+
if(items.size() == numHeaders) {
48+
// These items are already loaded.
49+
return;
50+
}
3951

4052
// prepopulate items with meta data from headers
4153
items.resize(numHeaders);
4254
for(int i = 0; i < numHeaders; ++i) {
4355
items[i].type = (Type)headers[i].type;
4456
items[i].name = get<char>(current, headers[i].nameLength);
57+
totalBytesLoaded += headers[i].nameLength; // Account for item name bytes
4558
items[i].mapped = mapped;
4659
}
4760

4861
// read in actual shape and data
4962
for(int i = 0; i < numHeaders; ++i) {
5063
uint64_t len = headers[i].shapeLength;
51-
items[i].shape.resize(len);
52-
const int* arr = get<int>(current, len); // read shape
53-
std::copy(arr, arr + len, items[i].shape.begin()); // copy to Item::shape
64+
items[i].shape.resize(len);
65+
const int* arr = get<int>(current, len); // read shape
66+
totalBytesLoaded += len * sizeof(int); // Account for shape bytes
67+
std::copy(arr, arr + len, items[i].shape.begin()); // copy to Item::shape
5468
}
5569

5670
// move by offset bytes, aligned to 256-bytes boundary
5771
uint64_t offset = *get<uint64_t>(current);
72+
totalBytesLoaded += sizeof(uint64_t); // Account for offset metadata
5873
get<char>(current, offset);
74+
totalBytesLoaded += offset; // Account for offset bytes
5975

6076
for(int i = 0; i < numHeaders; ++i) {
6177
//if(items[i].mapped && !isIntgemm(items[i].type)) { // memory-mapped, hence only set pointer. At the moment it intgemm matrices can't be used without processing
@@ -65,29 +81,44 @@ void loadItems(const void* current, std::vector<io::Item>& items, bool mapped) {
6581
// If this is not set, we trigger node_initializers.cpp:186. This one just assigns the memory ptr to the tensor if set to true, but at the moment
6682
// We are preparing some things on demand (the bottom portion of this code). Once we stop doing that, we can use the full mmap codepath
6783
// Also when using the full mmap codepath, we need to uncomment expression_graph.h:582
68-
uint64_t len = headers[i].dataLength;
69-
items[i].bytes.resize(len);
70-
const char* ptr = get<char>(current, len);
71-
if (matchType<intgemm8>(items[i].type)) {
72-
if (items[i].name.find("Wemb") != std::string::npos) { // Since Wemb need to be dequantised, we have a special case for them
84+
85+
auto resize = [&](uint64_t len) {
86+
items[i].bytes->resize(len);
87+
totalBytesLoaded += len;
88+
};
89+
const char* ptr = get<char>(current, headers[i].dataLength);
90+
91+
if(matchType<intgemm8>(items[i].type)) {
92+
if(items[i].name.find("Wemb") != std::string::npos) { // Since Wemb need to be dequantised,
93+
// we have a special case for them
7394
items[i].type = Type::float32;
74-
items[i].bytes.resize(items[i].shape.elements()*sizeof(float)); // We should have an extra float at the back but that requires a different format, due to allocator work
95+
resize(items[i].shape.elements()
96+
* sizeof(float)); // We should have an extra float at the back but that requires a
97+
// different format, due to allocator work
7598
cpu::integer::unquantizeWemb<Type::int8>(items[i], ptr);
7699
} else {
100+
resize(headers[i].dataLength);
77101
cpu::integer::prepareAndTransposeB<Type::int8>(items[i], ptr);
78102
}
79-
} else if (matchType<intgemm16>(items[i].type)) {
80-
if (items[i].name.find("Wemb") != std::string::npos) { // Since Wemb need to be dequantised, we have a special case for them
103+
} else if(matchType<intgemm16>(items[i].type)) {
104+
if(items[i].name.find("Wemb") != std::string::npos) { // Since Wemb need to be dequantised,
105+
// we have a special case for them
81106
items[i].type = Type::float32;
82-
items[i].bytes.resize(items[i].shape.elements()*sizeof(float)); // We should have an extra float at the back but that requires a different format, due to allocator work
107+
resize(items[i].shape.elements()
108+
* sizeof(float)); // We should have an extra float at the back but that requires a
109+
// different format, due to allocator work
83110
cpu::integer::unquantizeWemb<Type::int16>(items[i], ptr);
84111
} else {
112+
resize(headers[i].dataLength);
85113
cpu::integer::prepareAndTransposeB<Type::int16>(items[i], ptr);
86114
}
87115
} else {
88-
std::copy(ptr, ptr + len, items[i].bytes.begin());
116+
resize(headers[i].dataLength);
117+
std::copy(ptr, ptr + headers[i].dataLength, items[i].bytes->begin());
89118
}
90119
}
120+
121+
LOG(info, "[memory] Model data loaded in: {}", totalBytesLoaded);
91122
}
92123

93124
void loadItems(const std::string& fileName, std::vector<io::Item>& items) {
@@ -145,7 +176,7 @@ void saveItems(const std::string& fileName,
145176
headers.push_back(Header{item.name.size() + 1,
146177
(uint64_t)item.type,
147178
item.shape.size(),
148-
item.bytes.size()}); // binary item size with padding, will be 256-byte-aligned
179+
item.bytes->size()}); // binary item size with padding, will be 256-byte-aligned
149180
}
150181

151182
uint64_t headerSize = headers.size();
@@ -173,10 +204,10 @@ void saveItems(const std::string& fileName,
173204

174205
// Write out all values
175206
for(const auto& item : items)
176-
pos += out.write(item.data(), item.bytes.size()); // writes out data with padding, keeps 256-byte boundary.
177-
// Amazingly this is binary-compatible with V1 and aligned and
178-
// non-aligned models can be read with the same procedure.
179-
// No version-bump required. Gets 5-8% of speed back when mmapped.
207+
pos += out.write(item.data(), item.bytes->size()); // writes out data with padding, keeps 256-byte boundary.
208+
// Amazingly this is binary-compatible with V1 and aligned and
209+
// non-aligned models can be read with the same procedure.
210+
// No version-bump required. Gets 5-8% of speed back when mmapped.
180211
}
181212

182213
} // namespace binary

inference/marian-fork/src/common/io.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -77,10 +77,10 @@ void addMetaToItems(const std::string& meta,
7777
// increase size by 1 to add \0
7878
item.shape = Shape({(int)meta.size() + 1});
7979

80-
item.bytes.resize(item.shape.elements());
81-
std::copy(meta.begin(), meta.end(), item.bytes.begin());
80+
item.bytes->resize(item.shape.elements());
81+
std::copy(meta.begin(), meta.end(), item.bytes->begin());
8282
// set string terminator
83-
item.bytes.back() = '\0';
83+
item.bytes->back() = '\0';
8484

8585
item.type = Type::int8;
8686

@@ -104,12 +104,14 @@ void loadItemsFromNpz(const std::string& fileName, std::vector<Item>& items) {
104104
Item item;
105105
item.name = it.first;
106106
item.shape = shape;
107-
item.bytes.swap(it.second->bytes);
107+
item.bytes = std::make_shared<std::vector<char>>(std::move(it.second->bytes));
108+
108109
items.emplace_back(std::move(item));
109110
}
110111
}
111112

112113
std::vector<Item> loadItems(const std::string& fileName) {
114+
LOG(info, "Inside of loadItems from fileName");
113115
std::vector<Item> items;
114116
if(isNpz(fileName)) {
115117
loadItemsFromNpz(fileName, items);
@@ -155,7 +157,7 @@ void saveItemsNpz(const std::string& fileName, const std::vector<Item>& items) {
155157
else
156158
ABORT("Other types not supported yet");
157159

158-
npzItems.emplace_back(item.name, item.bytes, shape, type, sizeOf(item.type));
160+
npzItems.emplace_back(item.name, *item.bytes, shape, type, sizeOf(item.type));
159161
}
160162
cnpy::npz_save(fileName, npzItems);
161163
}

inference/marian-fork/src/common/io_item.h

Lines changed: 63 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,72 @@ namespace marian {
99
namespace io {
1010

1111
struct Item {
12-
std::vector<char> bytes;
12+
std::shared_ptr<std::vector<char>> bytes;
1313
const char* ptr{0};
1414
bool mapped{false};
1515

1616
std::string name;
1717
Shape shape;
1818
Type type{Type::float32};
1919

20+
// Default constructor
21+
Item() : bytes(std::make_shared<std::vector<char>>()) {}
22+
23+
// Copy constructor
24+
Item(const Item& other)
25+
: bytes(other.bytes),
26+
ptr(other.ptr),
27+
mapped(other.mapped),
28+
name(other.name),
29+
shape(other.shape),
30+
type(other.type) {}
31+
32+
// Copy assignment operator
33+
Item& operator=(const Item& other) {
34+
if(this != &other) {
35+
bytes = other.bytes;
36+
ptr = other.ptr;
37+
mapped = other.mapped;
38+
name = other.name;
39+
shape = other.shape;
40+
type = other.type;
41+
}
42+
return *this;
43+
}
44+
45+
// Move constructor
46+
Item(Item&& other) noexcept
47+
: bytes(std::move(other.bytes)),
48+
ptr(other.ptr),
49+
mapped(other.mapped),
50+
name(std::move(other.name)),
51+
shape(std::move(other.shape)),
52+
type(other.type) {
53+
other.ptr = nullptr;
54+
other.mapped = false;
55+
}
56+
57+
// Move assignment operator
58+
Item& operator=(Item&& other) noexcept {
59+
if(this != &other) {
60+
bytes = std::move(other.bytes);
61+
ptr = other.ptr;
62+
mapped = other.mapped;
63+
name = std::move(other.name);
64+
shape = std::move(other.shape);
65+
type = other.type;
66+
67+
other.ptr = nullptr;
68+
other.mapped = false;
69+
}
70+
return *this;
71+
}
72+
2073
const char* data() const {
2174
if(mapped)
2275
return ptr;
2376
else
24-
return bytes.data();
77+
return bytes->data();
2578
}
2679

2780
size_t size() const { // @TODO: review this again for 256-bytes boundary alignment
@@ -40,30 +93,30 @@ struct Item {
4093

4194
// cut to size (get rid of padding if any) to make append operation work correctly
4295
size_t bytesWithoutPadding = shape.elements() * sizeOf(type);
43-
bytes.resize(bytesWithoutPadding);
96+
bytes->resize(bytesWithoutPadding);
4497

4598
shape.set(-1, shape.elements() + other.shape.elements());
4699

47100
size_t addbytesWithoutPadding = other.shape.elements() * sizeOf(other.type); // ignore padding if any
48-
bytes.insert(bytes.end(), other.bytes.begin(), other.bytes.begin() + addbytesWithoutPadding);
101+
bytes->insert(bytes->end(), other.bytes->begin(), other.bytes->begin() + addbytesWithoutPadding);
49102

50103
// grow to align to 256 bytes boundary (will be undone when more pieces are appended)
51-
size_t multiplier = (size_t)ceil((float)bytes.size() / (float)256);
52-
bytes.resize(multiplier * 256);
104+
size_t multiplier = (size_t)ceil((float)bytes->size() / (float)256);
105+
bytes->resize(multiplier * 256);
53106
}
54107

55108
template <typename From, typename To>
56109
void convertFromTo() {
57110
size_t elements = size() / sizeof(From);
58111
size_t newSize = elements * sizeof(To);
59-
std::vector<char> newBytes(newSize);
112+
auto newBytes = std::make_shared<std::vector<char>>(newSize);
60113

61-
From* in = (From*)bytes.data();
62-
To* out = (To*)newBytes.data();
114+
From* in = (From*)bytes->data();
115+
To* out = (To*)newBytes->data();
63116
for(int i = 0; i < elements; ++i)
64117
out[i] = (To)in[i];
65118

66-
bytes.swap(newBytes);
119+
bytes->swap(*newBytes);
67120
}
68121

69122
template <typename T>

inference/marian-fork/src/graph/node_initializers.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ class LambdaInitConvert : public NodeInitializer {
2727
private:
2828
std::function<void(Tensor)> lambda_;
2929
Type intermediateType_; // is used for the creation of a temporary intermedia tensor on which the lambda actually operates.
30-
// This tensor is then automatically cast and copied to the type of the actual tensor.
30+
// This tensor is then automatically cast and copied to the type of the actual tensor.
3131

3232
public:
3333
LambdaInitConvert(std::function<void(Tensor)>&& lambda,
@@ -253,7 +253,6 @@ template Ptr<NodeInitializer> range<IndexType>(IndexType begin, IndexType end, I
253253
} // namespace inits
254254
} // namespace marian
255255

256-
257256
#if BLAS_FOUND && !WASM_COMPATIBLE_SOURCE
258257
#include "faiss/VectorTransform.h"
259258

inference/marian-fork/src/models/amun.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ class Amun : public EncoderDecoder {
180180
ioItems.emplace_back();
181181
ioItems.back().name = "decoder_c_tt";
182182
ioItems.back().shape = Shape({1, 0});
183-
ioItems.back().bytes.emplace_back((char)0);
183+
ioItems.back().bytes->emplace_back((char)0);
184184

185185
io::addMetaToItems(getModelParametersAsString(), "special:model.yml", ioItems);
186186
io::saveItems(name, ioItems);

inference/marian-fork/src/models/nematus.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ class Nematus : public EncoderDecoder {
7878
ioItems.emplace_back();
7979
ioItems.back().name = "decoder_c_tt";
8080
ioItems.back().shape = Shape({1, 0});
81-
ioItems.back().bytes.emplace_back((char)0);
81+
ioItems.back().bytes->emplace_back((char)0);
8282

8383
io::addMetaToItems(getModelParametersAsString(), "special:model.yml", ioItems);
8484
io::saveItems(name, ioItems);

inference/marian-fork/src/optimizers/optimizers.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,8 @@ void Adagrad::save(const std::string& name,
108108
item.name = "adagrad_gt";
109109
item.shape = Shape({1, (int)vGt.size()});
110110
item.type = Type::float32;
111-
item.bytes.resize(vGt.size() * sizeOf(item.type));
112-
std::copy((char*)vGt.data(), (char*)(vGt.data() + vGt.size()), item.bytes.begin());
111+
item.bytes->resize(vGt.size() * sizeOf(item.type));
112+
std::copy((char*)vGt.data(), (char*)(vGt.data() + vGt.size()), item.bytes->begin());
113113

114114
io::saveItems(name, {item});
115115
}
@@ -277,27 +277,27 @@ void Adam::save(const std::string& name,
277277
itemMt.name = "adam_mt";
278278
itemMt.shape = Shape({1, (int)vMt.size()});
279279
itemMt.type = Type::float32;
280-
itemMt.bytes.resize(vMt.size() * sizeOf(itemMt.type));
280+
itemMt.bytes->resize(vMt.size() * sizeOf(itemMt.type));
281281
std::copy(
282-
(char*)vMt.data(), (char*)(vMt.data() + vMt.size()), itemMt.bytes.begin());
282+
(char*)vMt.data(), (char*)(vMt.data() + vMt.size()), itemMt.bytes->begin());
283283

284284
io::Item itemVt;
285285
itemVt.name = "adam_vt";
286286
itemVt.shape = Shape({1, (int)vVt.size()});
287287
itemVt.type = Type::float32;
288-
itemVt.bytes.resize(vVt.size() * sizeOf(itemVt.type));
288+
itemVt.bytes->resize(vVt.size() * sizeOf(itemVt.type));
289289
std::copy(
290-
(char*)vVt.data(), (char*)(vVt.data() + vVt.size()), itemVt.bytes.begin());
290+
(char*)vVt.data(), (char*)(vVt.data() + vVt.size()), itemVt.bytes->begin());
291291

292292
// @TODO: this pattern is duplicated several times; refactor it
293293
std::array<double, 2> vDenoms{denom1_, denom2_};
294294
io::Item itemDenoms;
295295
itemDenoms.name = "adam_denoms";
296296
itemDenoms.shape = Shape({1, (int)vDenoms.size()});
297297
itemDenoms.type = Type::float64;
298-
itemDenoms.bytes.resize(vDenoms.size() * sizeOf(itemDenoms.type));
298+
itemDenoms.bytes->resize(vDenoms.size() * sizeOf(itemDenoms.type));
299299
std::copy(
300-
(char*)vDenoms.data(), (char*)(vDenoms.data() + vDenoms.size()), itemDenoms.bytes.begin());
300+
(char*)vDenoms.data(), (char*)(vDenoms.data() + vDenoms.size()), itemDenoms.bytes->begin());
301301

302302
io::saveItems(name, {itemMt, itemVt, itemDenoms});
303303
}

0 commit comments

Comments
 (0)