Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,12 @@ MatMulKleidiAIExecutor::MatMulKleidiAIExecutor(const FCAttrs& attrs,
if (!useDynamicQuant) {
auto dstDesc = originalWeightsDesc->cloneWithNewPrecision(memory.at(ARG_SRC)->getDescPtr()->getPrecision());
auto dnnlDstDesc = MemoryDescUtils::convertToDnnlMemoryDesc(dstDesc);
packedWeights = acl_fc_executor::reorderWeights(memory, context, aclfcAttrs, dnnlSrcDesc, dnnlDstDesc);
if (!attrs.weightsNonTransposed) {
dnnlDstDesc = acl_fc_executor::makeTransposedWeightDescriptor(dnnlDstDesc, dnnlSrcDesc);
aclfcAttrs.isWeightsRepacked = true;
}
MemoryCPtr packedWeights =
acl_fc_executor::reorderWeights(memory, context, aclfcAttrs, dnnlSrcDesc, dnnlDstDesc);

const size_t rhsPackedSize = kai_get_rhs_packed_size_rhs_pack_kxn_f32p8x1biasf32_f32_f32_neon(N, K);
auto rhsPackedDesc = std::make_shared<CpuBlockedMemoryDesc>(u8, Shape({rhsPackedSize}));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,6 @@ class MatMulKleidiAIExecutor : public Executor {
MemoryPtr biasMem;
MemoryPtr rhsPackedMem;
MemoryPtr lhsPackedMem;
MemoryCPtr packedWeights;
size_t M = 0UL, N = 0UL, K = 0UL;
size_t mr, nr, kr, sr;
// F32 Kernel block size
Expand Down
16 changes: 8 additions & 8 deletions src/plugins/intel_cpu/src/nodes/gathermatmul.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
// Copyright (C) 2018-2026 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "gathermatmul.h"

#include <oneapi/dnnl/dnnl_common_types.h>
Expand Down Expand Up @@ -46,7 +45,7 @@
#include "transformations/cpu_opset/common/op/batch_gather_matmul_compressed.hpp"
#include "transformations/utils/utils.hpp"
#include "utils/general_utils.h"

#ifdef OPENVINO_ARCH_X86_64
namespace ov::intel_cpu::node {

struct onednn_matmul_key {
Expand Down Expand Up @@ -299,7 +298,7 @@ bool GatherMatmul::isSupportedCompressedOperation([[maybe_unused]] const std::sh
[[maybe_unused]] size_t OC,
[[maybe_unused]] size_t G,
[[maybe_unused]] const Config& config) noexcept {
#ifdef OPENVINO_ARCH_X86_64
# ifdef OPENVINO_ARCH_X86_64
// copy paste from FullyConnected
try {
std::string errorMessage;
Expand Down Expand Up @@ -341,19 +340,19 @@ bool GatherMatmul::isSupportedCompressedOperation([[maybe_unused]] const std::sh
return false;
}
return true;
#else
# else
return false;
#endif
# endif
}

ov::element::TypeVector GatherMatmul::getSupportedCompressedWeightsTypes([[maybe_unused]] bool apply_fp8) {
using ov::element::Type_t;

#ifdef OPENVINO_ARCH_X86_64
# ifdef OPENVINO_ARCH_X86_64
return {Type_t::u8, Type_t::i8, Type_t::u4, Type_t::i4};
#else
# else
return {};
#endif
# endif
}

ov::element::TypeVector GatherMatmul::getSupportedCompressedActivationsTypes() {
Expand Down Expand Up @@ -860,3 +859,4 @@ bool GatherMatmul::created() const {
}

} // namespace ov::intel_cpu::node
#endif
20 changes: 20 additions & 0 deletions src/plugins/intel_cpu/src/nodes/gathermatmul.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,13 @@
#include <memory>
#include <oneapi/dnnl/dnnl.hpp>
#include <string>
#include <vector>

#include "cpu_memory.h"
#include "graph_context.h"
#include "node.h"
#include "nodes/executors/executor.hpp"
#include "nodes/executors/executor_factory.hpp"
#include "nodes/executors/memory_arguments.hpp"
#include "openvino/core/node.hpp"

Expand Down Expand Up @@ -54,6 +57,8 @@ class GatherMatmul : public Node {
WEIGHT_ZERO_POINTS,
};

#ifdef OPENVINO_ARCH_X86_64

class onednn_matmul;

using GemvImplPtr = std::shared_ptr<onednn_matmul>;
Expand All @@ -72,6 +77,21 @@ class GatherMatmul : public Node {
MemoryDescPtr m_tmpOutputDesc = nullptr;

bool bf16_amx_mode = false;
#else

ov::element::Type getRuntimePrecision() const override;
Algorithm algorithm = Algorithm::GatherMatmulDefault;
size_t numExperts = 0;

std::vector<ExecutorPtr> executor;
std::vector<MemoryArgs> memArgsFC;

MemoryPtr m_weightsMemory = nullptr;
MemoryPtr m_tmpInpBuffer = nullptr;
MemoryDescPtr m_tmpInputDesc = nullptr;
MemoryDescPtr m_tmpOutputDesc = nullptr;

#endif
Comment on lines +80 to +94
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some fields are clearly duplicated between if and else branches. Should we narrow the scope?

};

} // namespace ov::intel_cpu::node
Loading
Loading