Add cache encryption to vlm sample (openvinotoolkit#2038)

olpipi · mryzhov · web-flow · commit 95907822fc60 · 2025-04-15T11:33:22.000+02:00
[CVS-162990](https://jira.devtools.intel.com/browse/CVS-162990) Co-authored-by: Mikhail Ryzhov <mikhail.ryzhov@intel.com>
diff --git a/samples/cpp/visual_language_chat/encrypted_model_vlm.cpp b/samples/cpp/visual_language_chat/encrypted_model_vlm.cpp
@@ -11,7 +11,7 @@ std::pair<std::string, ov::Tensor> decrypt_model(const std::filesystem::path& mo
     std::ifstream model_file(model_dir / model_file_name);
     std::ifstream weights_file();
     if (!model_file.is_open()) {
-        throw std::runtime_error("Cannot open model or weights file");
+        throw std::runtime_error("Cannot open model file");
     }
     std::string model_str((std::istreambuf_iterator<char>(model_file)), std::istreambuf_iterator<char>());
 
@@ -30,6 +30,39 @@ ov::genai::Tokenizer decrypt_tokenizer(const std::filesystem::path& models_path)
     return ov::genai::Tokenizer(tok_model_str, tok_weights_tensor, detok_model_str, detok_weights_tensor);
 }
 
+static const char codec_key[] = {0x30, 0x60, 0x70, 0x02, 0x04, 0x08, 0x3F, 0x6F, 0x72, 0x74, 0x78, 0x7F};
+
+std::string codec_xor(const std::string& source_str) {
+    auto key_size = sizeof(codec_key);
+    int key_idx = 0;
+    std::string dst_str = source_str;
+    for (char& c : dst_str) {
+        c ^= codec_key[key_idx % key_size];
+        key_idx++;
+    }
+    return dst_str;
+}
+
+std::string encryption_callback(const std::string& source_str) {
+    return codec_xor(source_str);
+}
+
+std::string decryption_callback(const std::string& source_str) {
+    return codec_xor(source_str);
+}
+
+auto get_config_for_cache_encryption() {
+    ov::AnyMap config;
+    config.insert({ov::cache_dir("llm_cache")});
+    ov::EncryptionCallbacks encryption_callbacks;
+    //use XOR-based encryption as an example
+    encryption_callbacks.encrypt = encryption_callback;
+    encryption_callbacks.decrypt = decryption_callback;
+    config.insert(ov::cache_encryption_callbacks(encryption_callbacks));
+    config.insert(ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE));
+    return config;
+}
+
 bool print_subword(std::string&& subword) {
     return !(std::cout << subword << std::flush);
 }
@@ -61,7 +94,7 @@ int main(int argc, char* argv[]) try {
     if (device == "GPU") {
         // Cache compiled models on disk for GPU to save time on the
         // next run. It's not beneficial for CPU.
-        enable_compile_cache.insert({ov::cache_dir("vlm_cache")});
+        enable_compile_cache = get_config_for_cache_encryption();
     }
     ov::genai::VLMPipeline pipe(models_map, tokenizer, models_path,  device, enable_compile_cache);
 
diff --git a/samples/python/visual_language_chat/encrypted_model_vlm.py b/samples/python/visual_language_chat/encrypted_model_vlm.py
@@ -74,6 +74,25 @@ def read_images(path: str) -> list[Tensor]:
     return [read_image(path)]
 
 
+# here is example how to make cache de-encryption based on base64
+import base64
+
+def encrypt_base64(src: bytes):
+    return base64.b64encode(src)
+
+
+def decrypt_base64(src: bytes):
+    return base64.b64decode(src)
+
+
+def get_config_for_cache_encryption():
+    config_cache = dict()
+    config_cache["CACHE_DIR"] = "llm_cache"
+    config_cache["CACHE_ENCRYPTION_CALLBACKS"] = [encrypt_base64, decrypt_base64]
+    config_cache["CACHE_MODE"] = "OPTIMIZE_SIZE"
+    return config_cache
+
+
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument('model_dir')
@@ -100,7 +119,7 @@ def main():
     if "GPU" == device:
         # Cache compiled models on disk for GPU to save time on the
         # next run. It's not beneficial for CPU.
-        enable_compile_cache["CACHE_DIR"] = "vlm_cache"
+        enable_compile_cache = get_config_for_cache_encryption()
 
     pipe = openvino_genai.VLMPipeline(models_map, tokenizer, args.model_dir, device, **enable_compile_cache)