You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Improve the memory of the Wasm inference engine (#1135)
* Add a clear method to the AlignedMemory as it's just used to transfer
memory from the Wasm interface into the rest of the code. Otherwise
it is retained for the lifetime of the application.
* Apply the tensor allocator size changes to Wasm only
* Update the build-wasm script to use proper loggers and add a --fast flag
* Re-work marian::io::binary::loadItems to avoid unnecessary copies
* Add a shared pointer for the items
* Address review comments
totalBytesLoaded += len * sizeof(int); // Account for shape bytes
67
+
std::copy(arr, arr + len, items[i].shape.begin()); // copy to Item::shape
54
68
}
55
69
56
70
// move by offset bytes, aligned to 256-bytes boundary
57
71
uint64_t offset = *get<uint64_t>(current);
72
+
totalBytesLoaded += sizeof(uint64_t); // Account for offset metadata
58
73
get<char>(current, offset);
74
+
totalBytesLoaded += offset; // Account for offset bytes
59
75
60
76
for(int i = 0; i < numHeaders; ++i) {
61
77
//if(items[i].mapped && !isIntgemm(items[i].type)) { // memory-mapped, hence only set pointer. At the moment it intgemm matrices can't be used without processing
if(items[i].name.find("Wemb") != std::string::npos) { // Since Wemb need to be dequantised,
93
+
// we have a special case for them
73
94
items[i].type = Type::float32;
74
-
items[i].bytes.resize(items[i].shape.elements()*sizeof(float)); // We should have an extra float at the back but that requires a different format, due to allocator work
95
+
resize(items[i].shape.elements()
96
+
* sizeof(float)); // We should have an extra float at the back but that requires a
if (items[i].name.find("Wemb") != std::string::npos) { // Since Wemb need to be dequantised, we have a special case for them
103
+
} elseif(matchType<intgemm16>(items[i].type)) {
104
+
if(items[i].name.find("Wemb") != std::string::npos) { // Since Wemb need to be dequantised,
105
+
// we have a special case for them
81
106
items[i].type = Type::float32;
82
-
items[i].bytes.resize(items[i].shape.elements()*sizeof(float)); // We should have an extra float at the back but that requires a different format, due to allocator work
107
+
resize(items[i].shape.elements()
108
+
* sizeof(float)); // We should have an extra float at the back but that requires a
0 commit comments