Skip to content

Commit bcf59f3

Browse files
committed
serialize kv-cache precision information for ScaledDotProductAttention
1 parent 6b90078 commit bcf59f3

File tree

1 file changed

+8
-0
lines changed

1 file changed

+8
-0
lines changed

src/plugins/intel_cpu/src/graph_dumper.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
#include "cpu_types.h"
2424
#include "node.h"
25+
#include "nodes/scaled_attn.h"
2526
#include "onednn/dnnl.h"
2627
#include "openvino/core/except.hpp"
2728
#include "openvino/core/model.hpp"
@@ -122,6 +123,13 @@ std::map<std::string, std::string> extract_node_metadata(const NodePtr& node) {
122123
serialization_info[ov::exec_model_info::EXECUTION_ORDER] = std::to_string(node->getExecIndex());
123124

124125
serialization_info[ov::exec_model_info::RUNTIME_PRECISION] = node->getRuntimePrecision().get_type_name();
126+
// record kv cache precision for ScaledDotProductAttention node
127+
if (node->getType() == Type::ScaledDotProductAttention) {
128+
auto* sdpa_node = dynamic_cast<ov::intel_cpu::node::ScaledDotProductAttention*>(node.get());
129+
if (sdpa_node) {
130+
serialization_info["kv_cache_precision"] = sdpa_node->getKVCachePrecision().get_type_name();
131+
}
132+
}
125133

126134
return serialization_info;
127135
}

0 commit comments

Comments
 (0)