diff --git a/src/plugins/intel_cpu/src/nodes/concat.cpp b/src/plugins/intel_cpu/src/nodes/concat.cpp index 07970e6192bf3a..a0a4591376e2ec 100644 --- a/src/plugins/intel_cpu/src/nodes/concat.cpp +++ b/src/plugins/intel_cpu/src/nodes/concat.cpp @@ -34,6 +34,9 @@ #include "graph_context.h" #include "memory_desc/cpu_memory_desc.h" #include "node.h" +#include "nodes/executors/executor.hpp" +#include "nodes/executors/executor_factory.hpp" +#include "nodes/executors/memory_arguments.hpp" #include "nodes/node_config.h" #include "onednn/dnnl.h" #include "openvino/core/except.hpp" @@ -92,6 +95,7 @@ Concat::Concat(const std::shared_ptr& op, const GraphContext::CPtr& co } CPU_NODE_ASSERT(axis < static_cast(inRank) && axis >= 0, "has invalid value of axis parameter: ", axis); this->axis = axis; + m_attrs.axis = axis; } void Concat::getSupportedDescriptors() { @@ -242,12 +246,40 @@ void Concat::initSupportedPrimitiveDescriptors() { // Optimized inplace case for (auto refPdIndex : pdIndexesToReuse) { auto config = supportedPrimitiveDescriptors[refPdIndex].getConfig(); - ; for (auto& inConf : config.inConfs) { inConf.inPlace(0); } supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); } + + auto pushExecutorDesc = [&](LayoutType layoutType) { + NodeConfig nodeConfig; + nodeConfig.outConfs.resize(1); + nodeConfig.inConfs.resize(getParentEdges().size()); + + MemoryDescArgs descs; + descs.reserve(getParentEdges().size() + 1); + for (size_t i = 0; i < getParentEdges().size(); ++i) { + auto srcDesc = creatorsMap.at(layoutType)->createSharedDesc(inputPrecision, getInputShapeAtPort(i)); + nodeConfig.inConfs[i].setMemDesc(srcDesc); + nodeConfig.inConfs[i].inPlace(-1); + nodeConfig.inConfs[i].constant(false); + descs[ARG_SRC + i] = srcDesc; + } + + auto dstDesc = creatorsMap.at(layoutType)->createSharedDesc(outputPrecision, getOutputShapeAtPort(0)); + nodeConfig.outConfs[0].setMemDesc(dstDesc); + nodeConfig.outConfs[0].inPlace(-1); + nodeConfig.outConfs[0].constant(false); + descs[ARG_DST] = dstDesc; + + if (ExecutorFactory::hasSuitableImplementation(m_attrs, descs, memoryFormatFilter)) { + supportedPrimitiveDescriptors.emplace_back(nodeConfig, impl_desc_type::undef); + } + }; + + pushExecutorDesc(LayoutType::ncsp); + pushExecutorDesc(LayoutType::nspc); } void Concat::selectOptimalPrimitiveDescriptor() { @@ -354,7 +386,13 @@ void Concat::selectOptimalPrimitiveDescriptor() { return; } - // if there are more than one PD with similar data layouts - select the optimized one + for (auto indx : canSelectPrimitive) { + if (supportedPrimitiveDescriptors[indx].getImplementationType() == impl_desc_type::undef) { + selectPrimitiveDescriptorByIndex(static_cast(indx)); + return; + } + } + for (auto indx : canSelectPrimitive) { if (supportedPrimitiveDescriptors[indx].getImplementationType() == impl_desc_type::unknown) { selectPrimitiveDescriptorByIndex(static_cast(indx)); @@ -362,6 +400,13 @@ void Concat::selectOptimalPrimitiveDescriptor() { } } + for (auto indx : canSelectPrimitive) { + if (supportedPrimitiveDescriptors[indx].getImplementationType() == impl_desc_type::ref) { + selectPrimitiveDescriptorByIndex(static_cast(indx)); + return; + } + } + // if there are no matching data layouts, select first optimized implementation for (size_t i = 0; i < supportedPrimitiveDescriptors.size(); i++) { if (canBeInPlace && supportedPrimitiveDescriptors[i].getImplementationType() == impl_desc_type::unknown) { @@ -389,10 +434,29 @@ void Concat::prepareParams() { return; } + auto* selectedPd = getSelectedPrimitiveDescriptor(); + CPU_NODE_ASSERT(selectedPd, "Preferable primitive descriptor is not set."); + + if (useExecutor && m_executor) { + for (size_t i = 0; i < getParentEdges().size(); ++i) { + m_memory[ARG_SRC + i] = getSrcMemoryAtPort(i); + } + m_memory[ARG_DST] = getDstMemoryAtPort(0); + + if (m_executor->update(m_memory)) { + selectedPd->setImplementationType(m_executor->implType()); + return; + } + + // Fallback to oneDNN/ref concat when executor update is not applicable for runtime shapes. + useExecutor = false; + m_executor.reset(); + selectedPd->setImplementationType(impl_desc_type::ref); + } + const auto& dstMemPtr = getDstMemoryAtPort(0); CPU_NODE_ASSERT(dstMemPtr && dstMemPtr->isDefined(), "Destination memory is undefined."); auto dstMemDesc = dstMemPtr->getDescWithType(); - CPU_NODE_ASSERT(getSelectedPrimitiveDescriptor(), "Preferable primitive descriptor is not set."); const auto& outputStrides = dstMemDesc->getStrides(); size_t curConcatOffset = 0; @@ -502,6 +566,46 @@ size_t Concat::inverseOrder(const VectorDims& order, size_t axis) { return -1; } +void Concat::createPrimitive() { + auto* selectedPd = getSelectedPrimitiveDescriptor(); + CPU_NODE_ASSERT(selectedPd, "Preferable primitive descriptor is not set."); + + if (!isInPlace()) { + m_memory.clear(); + m_memory.reserve(getParentEdges().size() + 1); + for (size_t i = 0; i < getParentEdges().size(); ++i) { + m_memory[ARG_SRC + i] = getSrcMemoryAtPort(i); + } + m_memory[ARG_DST] = getDstMemoryAtPort(0); + + useExecutor = selectedPd->getImplementationType() == impl_desc_type::undef && !canOptimizeNspc; + m_executor.reset(); + + if (useExecutor) { + MemoryDescArgs descs; + descs.reserve(m_memory.size()); + for (const auto& [arg, mem] : m_memory) { + descs[arg] = mem->getDescPtr(); + } + + try { + auto executionContext = std::make_shared(context, getImplPriority()); + auto factory = std::make_shared>(m_attrs, + executionContext, + descs, + memoryFormatFilter); + m_executor = factory->make(m_memory); + selectedPd->setImplementationType(m_executor->implType()); + } catch (...) { + useExecutor = false; + m_executor.reset(); + } + } + } + + Node::createPrimitive(); +} + void Concat::initOptimalPrimitiveDescriptor() { auto* selected_pd = getSelectedPrimitiveDescriptor(); CPU_NODE_ASSERT(selected_pd, "Preferable primitive descriptor is not set."); @@ -525,6 +629,8 @@ void Concat::initOptimalPrimitiveDescriptor() { } } + useExecutor = selected_pd->getImplementationType() == impl_desc_type::undef; + // block layout may have axis greater than rank, disable ref_concat auto* primDesc = getSelectedPrimitiveDescriptor(); auto* memDesc = primDesc->getConfig().outConfs[0].getMemDesc()->as(); @@ -558,6 +664,11 @@ void Concat::execute(const dnnl::stream& strm) { return; } + if (useExecutor && m_executor) { + m_executor->execute(m_memory); + return; + } + if (canOptimize1DCase) { exec1DCase(); return; diff --git a/src/plugins/intel_cpu/src/nodes/concat.h b/src/plugins/intel_cpu/src/nodes/concat.h index cfbd7aa2830466..bd67622de7c123 100644 --- a/src/plugins/intel_cpu/src/nodes/concat.h +++ b/src/plugins/intel_cpu/src/nodes/concat.h @@ -14,6 +14,9 @@ #include "edge.h" #include "graph_context.h" #include "node.h" +#include "nodes/executors/concat.hpp" +#include "nodes/executors/executor.hpp" +#include "nodes/executors/memory_arguments.hpp" #include "openvino/core/node.hpp" #include "openvino/core/type/element_type.hpp" @@ -41,6 +44,7 @@ class Concat : public Node { [[nodiscard]] bool isExecutable() const override; [[nodiscard]] bool needPrepareParams() const override; void prepareParams() override; + void createPrimitive() override; // TODO: Move to base Node class when more nodes support fuse convert bool supportConvertFusion() const { return supportFuseConvert; @@ -70,6 +74,10 @@ class Concat : public Node { bool doFuseConvert = false; // whether to perform FP16 to FP32 conversion static constexpr size_t MAX_RANK_REF = 6; dnnl::primitive prim; + bool useExecutor = false; + ConcatAttrs m_attrs; + MemoryArgs m_memory; + ExecutorPtr m_executor = nullptr; }; } // namespace ov::intel_cpu::node diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_concat.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_concat.cpp new file mode 100644 index 00000000000000..4cf31fb514e957 --- /dev/null +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_concat.cpp @@ -0,0 +1,213 @@ +// Copyright (C) 2018-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "acl_concat.hpp" + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "acl_utils.hpp" +#include "memory_desc/cpu_memory_desc.h" +#include "nodes/executors/concat.hpp" +#include "nodes/executors/concat_config.hpp" +#include "nodes/executors/executor.hpp" +#include "nodes/executors/memory_arguments.hpp" +#include "openvino/core/except.hpp" +#include "openvino/core/type/element_type.hpp" +#include "utils/debug_capabilities.h" + +namespace ov::intel_cpu { + +namespace { + +template +std::vector getSourceArgIds(const T& args) { + std::vector sourceIds; + for (int argId = ARG_SRC;; ++argId) { + if (args.find(argId) == args.end()) { + break; + } + sourceIds.push_back(argId); + } + return sourceIds; +} + +bool isSupportedCommon(const ConcatAttrs& attrs, + const std::vector& srcDescs, + const MemoryDescPtr& dstDesc, + LayoutType expectedLayout) { + if (srcDescs.size() < 2 || !dstDesc) { + return false; + } + + const auto& firstDesc = srcDescs.front(); + const auto rank = firstDesc->getShape().getRank(); + if (rank == Shape::UNDEFINED_DIM || rank == 0 || rank > 4 || attrs.axis >= rank) { + return false; + } + + const auto precision = firstDesc->getPrecision(); + if (precision != ov::element::f16 && precision != ov::element::f32) { + return false; + } + + if (expectedLayout != LayoutType::ncsp && expectedLayout != LayoutType::nspc) { + return false; + } + + if (!firstDesc->hasLayoutType(expectedLayout)) { + return false; + } + + for (const auto& srcDesc : srcDescs) { + if (srcDesc->getPrecision() != precision || srcDesc->getShape().getRank() != rank || + !srcDesc->hasLayoutType(expectedLayout)) { + return false; + } + } + + return dstDesc->getPrecision() == precision && dstDesc->getShape().getRank() == rank && + dstDesc->hasLayoutType(expectedLayout); +} + +} // namespace + +AclConcatExecutor::AclConcatExecutor(const ConcatAttrs& attrs, + const MemoryArgs& memory, + [[maybe_unused]] const ExecutorContext::CPtr& context) + : m_attrs(attrs) { + const auto srcArgIds = getSourceArgIds(memory); + OPENVINO_ASSERT(!srcArgIds.empty(), "AclConcatExecutor requires at least one source tensor"); + + const auto& firstSrcDesc = memory.at(srcArgIds.front())->getDescPtr(); + m_expectedLayout = firstSrcDesc->hasLayoutType(LayoutType::nspc) ? LayoutType::nspc : LayoutType::ncsp; +} + +bool AclConcatExecutor::supports(const ConcatConfig& config, LayoutType expectedLayout) { + const auto srcArgIds = getSourceArgIds(config.descs); + if (srcArgIds.empty()) { + return false; + } + auto dstIt = config.descs.find(ARG_DST); + if (dstIt == config.descs.end()) { + return false; + } + + std::vector srcDescs; + srcDescs.reserve(srcArgIds.size()); + for (const auto& srcArgId : srcArgIds) { + srcDescs.push_back(config.descs.at(srcArgId)); + } + + return isSupportedCommon(config.attrs, srcDescs, dstIt->second, expectedLayout); +} + +bool AclConcatExecutor::update(const MemoryArgs& memory) { + const auto srcArgIds = getSourceArgIds(memory); + if (srcArgIds.empty()) { + return false; + } + auto dstIt = memory.find(ARG_DST); + if (dstIt == memory.end()) { + return false; + } + + std::vector srcDescs; + srcDescs.reserve(srcArgIds.size()); + for (const auto& srcArgId : srcArgIds) { + srcDescs.push_back(memory.at(srcArgId)->getDescPtr()); + } + + const auto& dstDesc = dstIt->second->getDescPtr(); + if (!isSupportedCommon(m_attrs, srcDescs, dstDesc, m_expectedLayout)) { + return false; + } + + const bool isNspc = m_expectedLayout == LayoutType::nspc; + const auto rank = srcDescs.front()->getShape().getRank(); + const auto precision = srcDescs.front()->getPrecision(); + const auto aclDataType = precisionToAclDataType(precision); + + if (aclDataType == arm_compute::DataType::UNKNOWN) { + return false; + } + + const auto aclLayout = getAclDataLayoutByMemoryDesc(srcDescs.front()); + if (aclLayout == arm_compute::DataLayout::UNKNOWN) { + return false; + } + + const int aclAxis = + axisCast(m_attrs.axis, rank, isNspc ? ACLAxisCastMode::NHWC_TO_NCHW : ACLAxisCastMode::NO_LAYOUT_CONVERSION); + if (aclAxis < 0 || static_cast(aclAxis) >= rank) { + return false; + } + + auto dstShape = shapeCast(memory.at(ARG_DST)->getStaticDims()); + if (isNspc) { + changeLayoutToNH_C({&dstShape}); + } + arm_compute::TensorInfo dstInfo(dstShape, 1, aclDataType, aclLayout); + + std::vector srcInfos; + srcInfos.reserve(srcArgIds.size()); + std::vector srcInfosPtrs; + srcInfosPtrs.reserve(srcArgIds.size()); + for (const auto& srcArgId : srcArgIds) { + auto srcShape = shapeCast(memory.at(srcArgId)->getStaticDims()); + if (isNspc) { + changeLayoutToNH_C({&srcShape}); + } + srcInfos.emplace_back(srcShape, 1, aclDataType, aclLayout); + srcInfosPtrs.push_back(&srcInfos.back()); + } + + auto status = arm_compute::NEConcatenateLayer::validate(srcInfosPtrs, &dstInfo, static_cast(aclAxis)); + if (!status) { + DEBUG_LOG("NEConcatenateLayer validation failed: ", status.error_description()); + return false; + } + + m_srcArgIds = srcArgIds; + m_srcTensors = std::vector(srcArgIds.size()); + m_dstTensor = arm_compute::Tensor(); + for (size_t i = 0; i < srcInfos.size(); ++i) { + m_srcTensors[i].allocator()->init(srcInfos[i]); + } + m_dstTensor.allocator()->init(dstInfo); + + configureThreadSafe([&] { + std::vector srcTensors; + srcTensors.reserve(m_srcTensors.size()); + for (const auto& srcTensor : m_srcTensors) { + srcTensors.push_back(&srcTensor); + } + m_concatLayer.configure(srcTensors, &m_dstTensor, static_cast(aclAxis)); + }); + + return true; +} + +void AclConcatExecutor::execute(const MemoryArgs& memory) { + for (size_t i = 0; i < m_srcArgIds.size(); ++i) { + m_srcTensors[i].allocator()->import_memory(memory.at(m_srcArgIds[i])->getData()); + } + m_dstTensor.allocator()->import_memory(memory.at(ARG_DST)->getData()); + + m_concatLayer.run(); + + for (auto& srcTensor : m_srcTensors) { + srcTensor.allocator()->free(); + } + m_dstTensor.allocator()->free(); +} + +} // namespace ov::intel_cpu diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_concat.hpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_concat.hpp new file mode 100644 index 00000000000000..f033cfe32b2690 --- /dev/null +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_concat.hpp @@ -0,0 +1,37 @@ +// Copyright (C) 2018-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "nodes/executors/concat_config.hpp" +#include "nodes/executors/executor.hpp" +#include "nodes/executors/memory_arguments.hpp" + +namespace ov::intel_cpu { + +class AclConcatExecutor : public Executor { +public: + AclConcatExecutor(const ConcatAttrs& attrs, const MemoryArgs& memory, const ExecutorContext::CPtr& context); + + static bool supports(const ConcatConfig& config, LayoutType expectedLayout); + + bool update(const MemoryArgs& memory) override; + void execute(const MemoryArgs& memory) override; + [[nodiscard]] impl_desc_type implType() const override { + return impl_desc_type::acl; + } + +private: + ConcatAttrs m_attrs; + LayoutType m_expectedLayout; + std::vector m_srcArgIds; + std::vector m_srcTensors; + arm_compute::Tensor m_dstTensor; + arm_compute::NEConcatenateLayer m_concatLayer; +}; + +} // namespace ov::intel_cpu diff --git a/src/plugins/intel_cpu/src/nodes/executors/concat.hpp b/src/plugins/intel_cpu/src/nodes/executors/concat.hpp new file mode 100644 index 00000000000000..efca7a198d1e5d --- /dev/null +++ b/src/plugins/intel_cpu/src/nodes/executors/concat.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2018-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +namespace ov::intel_cpu { + +struct ConcatAttrs { + size_t axis = 0; +}; + +} // namespace ov::intel_cpu diff --git a/src/plugins/intel_cpu/src/nodes/executors/concat_config.hpp b/src/plugins/intel_cpu/src/nodes/executors/concat_config.hpp new file mode 100644 index 00000000000000..d4b04c4e39a9fb --- /dev/null +++ b/src/plugins/intel_cpu/src/nodes/executors/concat_config.hpp @@ -0,0 +1,14 @@ +// Copyright (C) 2018-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "nodes/executors/concat.hpp" +#include "nodes/executors/executor_config.hpp" + +namespace ov::intel_cpu { + +using ConcatConfig = executor::Config; + +} // namespace ov::intel_cpu diff --git a/src/plugins/intel_cpu/src/nodes/executors/concat_implementations.cpp b/src/plugins/intel_cpu/src/nodes/executors/concat_implementations.cpp new file mode 100644 index 00000000000000..fba1c75968a8fa --- /dev/null +++ b/src/plugins/intel_cpu/src/nodes/executors/concat_implementations.cpp @@ -0,0 +1,85 @@ +// Copyright (C) 2018-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "memory_format_filter.hpp" +#include "nodes/executors/concat.hpp" +#include "nodes/executors/executor.hpp" +#include "nodes/executors/executor_config.hpp" +#include "nodes/executors/executor_implementation.hpp" +#include "nodes/executors/implementation_utils.hpp" +#include "nodes/executors/implementations.hpp" +#include "nodes/executors/memory_arguments.hpp" +#include "utils/arch_macros.h" + +#if defined(OV_CPU_WITH_ACL) +# include "memory_desc/cpu_memory_desc.h" +# include "nodes/executors/acl/acl_concat.hpp" +#endif + +namespace ov::intel_cpu { + +template <> +const std::vector>& getImplementations() { + static const std::vector> concatImplementations{ + // clang-format off + OV_CPU_INSTANCE_ACL( + "concat_acl_ncsp", + ExecutorType::Acl, + OperationType::Concat, + [](const executor::Config& config, + [[maybe_unused]] const MemoryFormatFilter& memoryFormatFilter) -> bool { + return AclConcatExecutor::supports(config, LayoutType::ncsp); + }, + HasNoOptimalConfig{}, + AcceptsAnyShape, + CreateDefault{}) + OV_CPU_INSTANCE_ACL( + "concat_acl_nspc", + ExecutorType::Acl, + OperationType::Concat, + [](const executor::Config& config, + [[maybe_unused]] const MemoryFormatFilter& memoryFormatFilter) -> bool { + return AclConcatExecutor::supports(config, LayoutType::nspc); + }, + HasNoOptimalConfig{}, + AcceptsAnyShape, + CreateDefault{}) + OV_CPU_INSTANCE_COMMON( + "concat_ref_ncsp", + ExecutorType::Reference, + OperationType::Concat, + []([[maybe_unused]] const executor::Config& config, + [[maybe_unused]] const MemoryFormatFilter& memoryFormatFilter) -> bool { + return false; + }, + HasNoOptimalConfig{}, + AcceptsAnyShape, + []([[maybe_unused]] const ConcatAttrs& attrs, + [[maybe_unused]] const MemoryArgs& memory, + [[maybe_unused]] const ExecutorContext::CPtr& context) -> ExecutorPtr { + return nullptr; + }) + OV_CPU_INSTANCE_COMMON( + "concat_ref_nspc", + ExecutorType::Reference, + OperationType::Concat, + []([[maybe_unused]] const executor::Config& config, + [[maybe_unused]] const MemoryFormatFilter& memoryFormatFilter) -> bool { + return false; + }, + HasNoOptimalConfig{}, + AcceptsAnyShape, + []([[maybe_unused]] const ConcatAttrs& attrs, + [[maybe_unused]] const MemoryArgs& memory, + [[maybe_unused]] const ExecutorContext::CPtr& context) -> ExecutorPtr { + return nullptr; + }) + // clang-format on + }; + return concatImplementations; +} + +} // namespace ov::intel_cpu diff --git a/src/plugins/intel_cpu/src/nodes/executors/executor.hpp b/src/plugins/intel_cpu/src/nodes/executors/executor.hpp index 5346ffcf83e757..ac88e50ba3bb82 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/executor.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/executor.hpp @@ -41,7 +41,7 @@ enum class ExecutorType : uint8_t { Kleidiai, }; -enum class OperationType : uint8_t { FullyConnected, MatMul, Convolution, Eltwise }; +enum class OperationType : uint8_t { FullyConnected, MatMul, Convolution, Eltwise, Concat }; std::string ExecutorTypeToString(ExecutorType type); ExecutorType ExecutorTypeFromString(const std::string& typeStr); diff --git a/src/plugins/intel_cpu/src/nodes/executors/executor_factory.hpp b/src/plugins/intel_cpu/src/nodes/executors/executor_factory.hpp index b12b752e5a2c52..547fb627b1ff94 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/executor_factory.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/executor_factory.hpp @@ -30,6 +30,13 @@ class ExecutorFactory { public: using ExecutorImplementationRef = std::reference_wrapper>; + [[nodiscard]] static bool hasSuitableImplementation(const Attrs& attrs, + const MemoryDescArgs& descriptors, + const MemoryFormatFilter& memoryFormatFilter = {}, + const std::string& implementationPriority = {}) { + return !filter(attrs, descriptors, memoryFormatFilter, implementationPriority).empty(); + } + ExecutorFactory(Attrs attrs, ExecutorContext::CPtr context, const MemoryDescArgs& descriptors, diff --git a/src/plugins/intel_cpu/src/nodes/executors/implementations.hpp b/src/plugins/intel_cpu/src/nodes/executors/implementations.hpp index c0ac6e9ac9ccc5..c6a9d8913b9b36 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/implementations.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/implementations.hpp @@ -7,6 +7,7 @@ #include #include +#include "nodes/executors/concat_config.hpp" #include "nodes/executors/convolution_config.hpp" #include "nodes/executors/eltwise_config.hpp" #include "nodes/executors/executor_implementation.hpp" @@ -37,6 +38,10 @@ const std::vector>& getImplementations(); template <> const std::vector>& getImplementations(); +// Concat +template <> +const std::vector>& getImplementations(); + // MatMul template <> const std::vector>& getImplementations();