mozilla
diff --git a/‎inference/marian-fork/src/common/binary.cpp‎
Lines changed: 51 additions & 20 deletions b/‎inference/marian-fork/src/common/binary.cpp‎
Lines changed: 51 additions & 20 deletions
diff --git a/‎inference/marian-fork/src/common/io.cpp‎
Lines changed: 7 additions & 5 deletions b/‎inference/marian-fork/src/common/io.cpp‎
Lines changed: 7 additions & 5 deletions
diff --git a/‎inference/marian-fork/src/common/io_item.h‎
Lines changed: 63 additions & 10 deletions b/‎inference/marian-fork/src/common/io_item.h‎
Lines changed: 63 additions & 10 deletions
diff --git a/‎inference/marian-fork/src/graph/node_initializers.cpp‎
Lines changed: 1 addition & 2 deletions b/‎inference/marian-fork/src/graph/node_initializers.cpp‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎inference/marian-fork/src/models/amun.h‎
Lines changed: 1 addition & 1 deletion b/‎inference/marian-fork/src/models/amun.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎inference/marian-fork/src/models/nematus.h‎
Lines changed: 1 addition & 1 deletion b/‎inference/marian-fork/src/models/nematus.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎inference/marian-fork/src/optimizers/optimizers.cpp‎
Lines changed: 8 additions & 8 deletions b/‎inference/marian-fork/src/optimizers/optimizers.cpp‎
Lines changed: 8 additions & 8 deletions
@@ -28,34 +28,50 @@ const T* get(const void*& current, uint64_t num = 1) {
 }
 
 void loadItems(const void* current, std::vector<io::Item>& items, bool mapped) {
+  uint64_t totalBytesLoaded = 0;  // Track total bytes loaded
+
   uint64_t binaryFileVersion = *get<uint64_t>(current);
   ABORT_IF(binaryFileVersion != BINARY_FILE_VERSION,
            "Binary file versions do not match: {} (file) != {} (expected)",
            binaryFileVersion,
            BINARY_FILE_VERSION);
 
-  uint64_t numHeaders = *get<uint64_t>(current); // number of item headers that follow
-  const Header* headers = get<Header>(current, numHeaders); // read that many headers
+  totalBytesLoaded += sizeof(uint64_t);  // Account for binaryFileVersion
+
+  uint64_t numHeaders = *get<uint64_t>(current);  // number of item headers that follow
+  totalBytesLoaded += sizeof(uint64_t);           // Account for numHeaders
+
+  const Header* headers = get<Header>(current, numHeaders);  // read that many headers
+  totalBytesLoaded += sizeof(Header) * numHeaders;           // Account for headers
+
+  if(items.size() == numHeaders) {
+    // These items are already loaded.
+    return;
+  }
 
   // prepopulate items with meta data from headers
   items.resize(numHeaders);
   for(int i = 0; i < numHeaders; ++i) {
     items[i].type = (Type)headers[i].type;
     items[i].name = get<char>(current, headers[i].nameLength);
+    totalBytesLoaded += headers[i].nameLength;  // Account for item name bytes
     items[i].mapped = mapped;
   }
 
   // read in actual shape and data
   for(int i = 0; i < numHeaders; ++i) {
     uint64_t len = headers[i].shapeLength;
-    items[i].shape.resize(len); 
-    const int* arr = get<int>(current, len); // read shape
-    std::copy(arr, arr + len, items[i].shape.begin()); // copy to Item::shape 
+    items[i].shape.resize(len);
+    const int* arr = get<int>(current, len);            // read shape
+    totalBytesLoaded += len * sizeof(int);              // Account for shape bytes
+    std::copy(arr, arr + len, items[i].shape.begin());  // copy to Item::shape
   }
 
   // move by offset bytes, aligned to 256-bytes boundary
   uint64_t offset = *get<uint64_t>(current);
+  totalBytesLoaded += sizeof(uint64_t);  // Account for offset metadata
   get<char>(current, offset);
+  totalBytesLoaded += offset;  // Account for offset bytes
 
   for(int i = 0; i < numHeaders; ++i) {
     //if(items[i].mapped && !isIntgemm(items[i].type)) { // memory-mapped, hence only set pointer. At the moment it intgemm matrices can't be used without processing
@@ -65,29 +81,44 @@ void loadItems(const void* current, std::vector<io::Item>& items, bool mapped) {
                               // If this is not set, we trigger node_initializers.cpp:186. This one just assigns the memory ptr to the tensor if set to true, but at the moment
                               // We are preparing some things on demand (the bottom portion of this code). Once we stop doing that, we can use the full mmap codepath
                               // Also when using the full mmap codepath, we need to uncomment expression_graph.h:582
-    uint64_t len = headers[i].dataLength;
-    items[i].bytes.resize(len);
-    const char* ptr = get<char>(current, len);
-    if (matchType<intgemm8>(items[i].type)) {
-      if (items[i].name.find("Wemb") != std::string::npos) { // Since Wemb need to be dequantised, we have a special case for them
+
+    auto resize = [&](uint64_t len) {
+      items[i].bytes->resize(len);
+      totalBytesLoaded += len;
+    };
+    const char* ptr = get<char>(current, headers[i].dataLength);
+
+    if(matchType<intgemm8>(items[i].type)) {
+      if(items[i].name.find("Wemb") != std::string::npos) {  // Since Wemb need to be dequantised,
+                                                             // we have a special case for them
         items[i].type = Type::float32;
-        items[i].bytes.resize(items[i].shape.elements()*sizeof(float)); // We should have an extra float at the back but that requires a different format, due to allocator work
+        resize(items[i].shape.elements()
+               * sizeof(float));  // We should have an extra float at the back but that requires a
+                                  // different format, due to allocator work
         cpu::integer::unquantizeWemb<Type::int8>(items[i], ptr);
       } else {
+        resize(headers[i].dataLength);
         cpu::integer::prepareAndTransposeB<Type::int8>(items[i], ptr);
       }
-    } else if (matchType<intgemm16>(items[i].type)) {
-      if (items[i].name.find("Wemb") != std::string::npos) { // Since Wemb need to be dequantised, we have a special case for them
+    } else if(matchType<intgemm16>(items[i].type)) {
+      if(items[i].name.find("Wemb") != std::string::npos) {  // Since Wemb need to be dequantised,
+                                                             // we have a special case for them
         items[i].type = Type::float32;
-        items[i].bytes.resize(items[i].shape.elements()*sizeof(float)); // We should have an extra float at the back but that requires a different format, due to allocator work
+        resize(items[i].shape.elements()
+               * sizeof(float));  // We should have an extra float at the back but that requires a
+                                  // different format, due to allocator work
         cpu::integer::unquantizeWemb<Type::int16>(items[i], ptr);
       } else {
+        resize(headers[i].dataLength);
         cpu::integer::prepareAndTransposeB<Type::int16>(items[i], ptr);
       }
     } else {
-      std::copy(ptr, ptr + len, items[i].bytes.begin());
+      resize(headers[i].dataLength);
+      std::copy(ptr, ptr + headers[i].dataLength, items[i].bytes->begin());
     }
   }
+
+  LOG(info, "[memory] Model data loaded in: {}", totalBytesLoaded);
 }
 
 void loadItems(const std::string& fileName, std::vector<io::Item>& items) {
@@ -145,7 +176,7 @@ void saveItems(const std::string& fileName,
     headers.push_back(Header{item.name.size() + 1,
                              (uint64_t)item.type,
                              item.shape.size(),
-                             item.bytes.size()}); // binary item size with padding, will be 256-byte-aligned
+                             item.bytes->size()}); // binary item size with padding, will be 256-byte-aligned
   }
 
   uint64_t headerSize = headers.size();
@@ -173,10 +204,10 @@ void saveItems(const std::string& fileName,
 
   // Write out all values
   for(const auto& item : items)
-    pos += out.write(item.data(), item.bytes.size()); // writes out data with padding, keeps 256-byte boundary. 
-                                                      // Amazingly this is binary-compatible with V1 and aligned and 
-                                                      // non-aligned models can be read with the same procedure.
-                                                      // No version-bump required. Gets 5-8% of speed back when mmapped.
+    pos += out.write(item.data(), item.bytes->size()); // writes out data with padding, keeps 256-byte boundary. 
+                                                       // Amazingly this is binary-compatible with V1 and aligned and 
+                                                       // non-aligned models can be read with the same procedure.
+                                                       // No version-bump required. Gets 5-8% of speed back when mmapped.
 }
 
 }  // namespace binary
 
@@ -77,10 +77,10 @@ void addMetaToItems(const std::string& meta,
   // increase size by 1 to add \0
   item.shape = Shape({(int)meta.size() + 1});
 
-  item.bytes.resize(item.shape.elements());
-  std::copy(meta.begin(), meta.end(), item.bytes.begin());
+  item.bytes->resize(item.shape.elements());
+  std::copy(meta.begin(), meta.end(), item.bytes->begin());
   // set string terminator
-  item.bytes.back() = '\0';
+  item.bytes->back() = '\0';
 
   item.type = Type::int8;
 
@@ -104,12 +104,14 @@ void loadItemsFromNpz(const std::string& fileName, std::vector<Item>& items) {
     Item item;
     item.name = it.first;
     item.shape = shape;
-    item.bytes.swap(it.second->bytes);
+    item.bytes = std::make_shared<std::vector<char>>(std::move(it.second->bytes));
+
     items.emplace_back(std::move(item));
   }
 }
 
 std::vector<Item> loadItems(const std::string& fileName) {
+  LOG(info, "Inside of loadItems from fileName");
   std::vector<Item> items;
   if(isNpz(fileName)) {
     loadItemsFromNpz(fileName, items);
@@ -155,7 +157,7 @@ void saveItemsNpz(const std::string& fileName, const std::vector<Item>& items) {
     else
       ABORT("Other types not supported yet");
 
-    npzItems.emplace_back(item.name, item.bytes, shape, type, sizeOf(item.type));
+    npzItems.emplace_back(item.name, *item.bytes, shape, type, sizeOf(item.type));
   }
   cnpy::npz_save(fileName, npzItems);
 }
 
@@ -9,19 +9,72 @@ namespace marian {
 namespace io {
 
 struct Item {
-  std::vector<char> bytes;
+  std::shared_ptr<std::vector<char>> bytes;
   const char* ptr{0};
   bool mapped{false};
 
   std::string name;
   Shape shape;
   Type type{Type::float32};
 
+  // Default constructor
+  Item() : bytes(std::make_shared<std::vector<char>>()) {}
+
+  // Copy constructor
+  Item(const Item& other)
+      : bytes(other.bytes),
+        ptr(other.ptr),
+        mapped(other.mapped),
+        name(other.name),
+        shape(other.shape),
+        type(other.type) {}
+
+  // Copy assignment operator
+  Item& operator=(const Item& other) {
+    if(this != &other) {
+      bytes = other.bytes;
+      ptr = other.ptr;
+      mapped = other.mapped;
+      name = other.name;
+      shape = other.shape;
+      type = other.type;
+    }
+    return *this;
+  }
+
+  // Move constructor
+  Item(Item&& other) noexcept
+      : bytes(std::move(other.bytes)),
+        ptr(other.ptr),
+        mapped(other.mapped),
+        name(std::move(other.name)),
+        shape(std::move(other.shape)),
+        type(other.type) {
+    other.ptr = nullptr;
+    other.mapped = false;
+  }
+
+  // Move assignment operator
+  Item& operator=(Item&& other) noexcept {
+    if(this != &other) {
+      bytes = std::move(other.bytes);
+      ptr = other.ptr;
+      mapped = other.mapped;
+      name = std::move(other.name);
+      shape = std::move(other.shape);
+      type = other.type;
+
+      other.ptr = nullptr;
+      other.mapped = false;
+    }
+    return *this;
+  }
+
   const char* data() const {
     if(mapped)
       return ptr;
     else
-      return bytes.data();
+      return bytes->data();
   }
 
   size_t size() const { // @TODO: review this again for 256-bytes boundary alignment
@@ -40,30 +93,30 @@ struct Item {
 
     // cut to size (get rid of padding if any) to make append operation work correctly
     size_t bytesWithoutPadding = shape.elements() * sizeOf(type);
-    bytes.resize(bytesWithoutPadding);
+    bytes->resize(bytesWithoutPadding);
 
     shape.set(-1, shape.elements() + other.shape.elements());
 
     size_t addbytesWithoutPadding = other.shape.elements() * sizeOf(other.type); // ignore padding if any
-    bytes.insert(bytes.end(), other.bytes.begin(), other.bytes.begin() + addbytesWithoutPadding);
+    bytes->insert(bytes->end(), other.bytes->begin(), other.bytes->begin() + addbytesWithoutPadding);
 
     // grow to align to 256 bytes boundary (will be undone when more pieces are appended)
-    size_t multiplier = (size_t)ceil((float)bytes.size() / (float)256);
-    bytes.resize(multiplier * 256);
+    size_t multiplier = (size_t)ceil((float)bytes->size() / (float)256);
+    bytes->resize(multiplier * 256);
   }
 
   template <typename From, typename To>
   void convertFromTo() {
     size_t elements = size() / sizeof(From);
     size_t newSize = elements * sizeof(To);
-    std::vector<char> newBytes(newSize);
+    auto newBytes = std::make_shared<std::vector<char>>(newSize);
 
-    From* in = (From*)bytes.data();
-    To* out = (To*)newBytes.data();
+    From* in = (From*)bytes->data();
+    To* out = (To*)newBytes->data();
     for(int i = 0; i < elements; ++i)
       out[i] = (To)in[i];
 
-    bytes.swap(newBytes);
+    bytes->swap(*newBytes);
   }
 
   template <typename T>
 
@@ -27,7 +27,7 @@ class LambdaInitConvert : public NodeInitializer {
   private:
     std::function<void(Tensor)> lambda_;
     Type intermediateType_; // is used for the creation of a temporary intermedia tensor on which the lambda actually operates.
-                            // This tensor is then automatically cast and copied to the type of the actual tensor. 
+                            // This tensor is then automatically cast and copied to the type of the actual tensor.
 
   public:
     LambdaInitConvert(std::function<void(Tensor)>&& lambda,
@@ -253,7 +253,6 @@ template Ptr<NodeInitializer> range<IndexType>(IndexType begin, IndexType end, I
 }  // namespace inits
 }  // namespace marian
 
-
 #if BLAS_FOUND && !WASM_COMPATIBLE_SOURCE
 #include "faiss/VectorTransform.h"
 
 
@@ -180,7 +180,7 @@ class Amun : public EncoderDecoder {
     ioItems.emplace_back();
     ioItems.back().name = "decoder_c_tt";
     ioItems.back().shape = Shape({1, 0});
-    ioItems.back().bytes.emplace_back((char)0);
+    ioItems.back().bytes->emplace_back((char)0);
 
     io::addMetaToItems(getModelParametersAsString(), "special:model.yml", ioItems);
     io::saveItems(name, ioItems);
 
@@ -78,7 +78,7 @@ class Nematus : public EncoderDecoder {
     ioItems.emplace_back();
     ioItems.back().name = "decoder_c_tt";
     ioItems.back().shape = Shape({1, 0});
-    ioItems.back().bytes.emplace_back((char)0);
+    ioItems.back().bytes->emplace_back((char)0);
 
     io::addMetaToItems(getModelParametersAsString(), "special:model.yml", ioItems);
     io::saveItems(name, ioItems);
 
@@ -108,8 +108,8 @@ void Adagrad::save(const std::string& name,
   item.name = "adagrad_gt";
   item.shape = Shape({1, (int)vGt.size()});
   item.type = Type::float32;
-  item.bytes.resize(vGt.size() * sizeOf(item.type));
-  std::copy((char*)vGt.data(), (char*)(vGt.data() + vGt.size()), item.bytes.begin());
+  item.bytes->resize(vGt.size() * sizeOf(item.type));
+  std::copy((char*)vGt.data(), (char*)(vGt.data() + vGt.size()), item.bytes->begin());
 
   io::saveItems(name, {item});
 }
@@ -277,27 +277,27 @@ void Adam::save(const std::string& name,
   itemMt.name = "adam_mt";
   itemMt.shape = Shape({1, (int)vMt.size()});
   itemMt.type = Type::float32;
-  itemMt.bytes.resize(vMt.size() * sizeOf(itemMt.type));
+  itemMt.bytes->resize(vMt.size() * sizeOf(itemMt.type));
   std::copy(
-      (char*)vMt.data(), (char*)(vMt.data() + vMt.size()), itemMt.bytes.begin());
+      (char*)vMt.data(), (char*)(vMt.data() + vMt.size()), itemMt.bytes->begin());
 
   io::Item itemVt;
   itemVt.name = "adam_vt";
   itemVt.shape = Shape({1, (int)vVt.size()});
   itemVt.type = Type::float32;
-  itemVt.bytes.resize(vVt.size() * sizeOf(itemVt.type));
+  itemVt.bytes->resize(vVt.size() * sizeOf(itemVt.type));
   std::copy(
-      (char*)vVt.data(), (char*)(vVt.data() + vVt.size()), itemVt.bytes.begin());
+      (char*)vVt.data(), (char*)(vVt.data() + vVt.size()), itemVt.bytes->begin());
 
   // @TODO: this pattern is duplicated several times; refactor it
   std::array<double, 2> vDenoms{denom1_, denom2_};
   io::Item itemDenoms;
   itemDenoms.name = "adam_denoms";
   itemDenoms.shape = Shape({1, (int)vDenoms.size()});
   itemDenoms.type = Type::float64;
-  itemDenoms.bytes.resize(vDenoms.size() * sizeOf(itemDenoms.type));
+  itemDenoms.bytes->resize(vDenoms.size() * sizeOf(itemDenoms.type));
   std::copy(
-      (char*)vDenoms.data(), (char*)(vDenoms.data() + vDenoms.size()), itemDenoms.bytes.begin());
+      (char*)vDenoms.data(), (char*)(vDenoms.data() + vDenoms.size()), itemDenoms.bytes->begin());
 
   io::saveItems(name, {itemMt, itemVt, itemDenoms});
 }