diff --git a/runtime/onert/backend/cpu/CMakeLists.txt b/runtime/onert/backend/cpu/CMakeLists.txt index 6fbf85edfe9..6744b5c7e13 100644 --- a/runtime/onert/backend/cpu/CMakeLists.txt +++ b/runtime/onert/backend/cpu/CMakeLists.txt @@ -1,7 +1,7 @@ nnfw_find_package(Ruy REQUIRED) file(GLOB SOURCES "*.cc") -list(APPEND SOURCES ops/GGMLHelper.cc ops/OperationUtils.cc) +list(APPEND SOURCES ops/OperationUtils.cc) macro(OP NAME) list(APPEND SOURCES ops/${NAME}Layer.cc) endmacro(OP) @@ -20,8 +20,6 @@ target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_coverage) target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE ruy) target_link_libraries(${LIB_ONERT_BACKEND_CPU} INTERFACE ruy_instrumentation) target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE ndarray) -# Set public: ExternalContext is used in train backend -target_link_libraries(${LIB_ONERT_BACKEND_CPU} PUBLIC ggml) set_target_properties(${LIB_ONERT_BACKEND_CPU} PROPERTIES OUTPUT_NAME backend_cpu diff --git a/runtime/onert/backend/cpu/ExternalContext.cc b/runtime/onert/backend/cpu/ExternalContext.cc index 7df60c0f61e..de55970f801 100644 --- a/runtime/onert/backend/cpu/ExternalContext.cc +++ b/runtime/onert/backend/cpu/ExternalContext.cc @@ -70,11 +70,4 @@ void ExternalContext::setMaxNumThreads(int max_num_threads) _ruy_context->set_max_num_threads(_max_num_threads); } -void ExternalContext::initGgmlContext() -{ - if (_ggml_context == nullptr) - _ggml_context = std::unique_ptr( - ggml_init({.mem_size = 0, .mem_buffer = nullptr, .no_alloc = true}), &ggml_free); -} - } // namespace onert::backend::cpu diff --git a/runtime/onert/backend/cpu/ExternalContext.h b/runtime/onert/backend/cpu/ExternalContext.h index 2f0fa7dcf7c..cdf01e25c6a 100644 --- a/runtime/onert/backend/cpu/ExternalContext.h +++ b/runtime/onert/backend/cpu/ExternalContext.h @@ -19,7 +19,6 @@ #include #include -#include #include @@ -36,14 +35,11 @@ class ExternalContext int32_t maxNumThreads() const { return _max_num_threads; } - void initGgmlContext(); - ruy::Context *ruy_context() const { return _ruy_context.get(); } private: int32_t _max_num_threads; const std::unique_ptr _ruy_context; - std::unique_ptr _ggml_context{nullptr, &ggml_free}; }; } // namespace onert::backend::cpu diff --git a/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc b/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc index 00eddd79202..6898dd4217d 100644 --- a/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc +++ b/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc @@ -16,7 +16,6 @@ #include "FullyConnectedLayer.h" -#include "GGMLHelper.h" #include "../Tensor.h" #include "../KernelGenerator.h" #include "../Validator.h" @@ -28,7 +27,21 @@ namespace onert::backend::cpu { -void Validator::visit(const ir::operation::FullyConnected &) { _supported = true; } +void Validator::visit(const ir::operation::FullyConnected &node) +{ + using ir::operation::FullyConnected; + + const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)}; + const auto weight_node = &_graph.operands().at(weight_index); + + _supported = false; + + if (weight_node->typeInfo().type() == ir::DataType::QUANT_GGML_Q4_0 || + weight_node->typeInfo().type() == ir::DataType::QUANT_GGML_Q8_0) + return; + + _supported = true; +} void KernelGenerator::visit(const ir::operation::FullyConnected &node) { @@ -206,39 +219,6 @@ void FullyConnectedLayer::fullyConnectedSparseWeight() throw std::runtime_error{"FullyConnected: unsupported sparsity"}; } -void FullyConnectedLayer::fullyConnectedGGMLWeight() -{ - if (_bias) - throw std::runtime_error{"FullyConnected: GGML weights format does not support bias yet."}; - - // convert tensor - auto input = getGGMLTensor(_input); - auto weights = getGGMLTensor(_weights); - auto output = getGGMLTensor(_output); - { - output.op = GGML_OP_MUL_MAT; - output.src[0] = &weights; - output.src[1] = &input; - } - auto *nodes = &output; - - // create graph - struct ggml_cgraph graph; - { - memset(&graph, 0, sizeof(graph)); - graph.n_nodes = 1; - graph.nodes = &nodes; - } - - // get cplan - auto cplan = ggml_graph_plan(&graph, _external_context->maxNumThreads()); - std::vector buf(cplan.work_size); - cplan.work_data = buf.data(); - - // compute - ggml_graph_compute(&graph, &cplan); -} - void FullyConnectedLayer::fullyConnected16x1Float32() { #if defined(__aarch64__) && defined(USE_NEON) @@ -279,10 +259,6 @@ void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortabl } #endif _external_context = external_context; - - if (_weights->data_type() == OperandType::QUANT_GGML_Q4_0 || - _weights->data_type() == OperandType::QUANT_GGML_Q8_0) - _external_context->initGgmlContext(); } void FullyConnectedLayer::run() @@ -295,11 +271,6 @@ void FullyConnectedLayer::run() { fullyConnectedSparseWeight(); } - else if (_weights->data_type() == OperandType::QUANT_GGML_Q4_0 || - _weights->data_type() == OperandType::QUANT_GGML_Q8_0) - { - fullyConnectedGGMLWeight(); - } else if (_input->data_type() == OperandType::FLOAT32) { _is_shuffled16x1float32 ? fullyConnected16x1Float32() : fullyConnectedFloat32(); diff --git a/runtime/onert/backend/cpu/ops/GatherLayer.cc b/runtime/onert/backend/cpu/ops/GatherLayer.cc index 2a43fa71147..20f5d673734 100644 --- a/runtime/onert/backend/cpu/ops/GatherLayer.cc +++ b/runtime/onert/backend/cpu/ops/GatherLayer.cc @@ -17,7 +17,6 @@ #include "GatherLayer.h" #include "OperationUtils.h" -#include "GGMLHelper.h" #include "../KernelGenerator.h" #include "../Validator.h" @@ -26,7 +25,20 @@ namespace onert::backend::cpu { -void Validator::visit(const ir::operation::Gather &) { _supported = true; } +void Validator::visit(const ir::operation::Gather &node) +{ + using ir::operation::Gather; + + const auto input_index{node.getInputs().at(Gather::Input::INPUT)}; + const auto input_node = &_graph.operands().at(input_index); + + _supported = false; + + if (input_node->typeInfo().type() == ir::DataType::QUANT_GGML_Q4_0) + return; + + _supported = true; +} void KernelGenerator::visit(const ir::operation::Gather &node) { @@ -43,7 +55,7 @@ void KernelGenerator::visit(const ir::operation::Gather &node) auto fn = std::make_unique(); - fn->configure(input_tensor, indices_tensor, output_tensor, axis, _external_context.get()); + fn->configure(input_tensor, indices_tensor, output_tensor, axis); _return_fn = std::move(fn); } @@ -54,16 +66,12 @@ namespace onert::backend::cpu::ops { void GatherLayer::configure(const IPortableTensor *input, const IPortableTensor *indices, - IPortableTensor *output, int32_t axis, ExternalContext *ctx) + IPortableTensor *output, int32_t axis) { _input = input; _indices = indices; _axis = axis; _output = output; - _ctx = ctx; - - if (_input->data_type() == OperandType::QUANT_GGML_Q4_0) - ctx->initGgmlContext(); } template void GatherLayer::runByInputType() @@ -97,53 +105,6 @@ template void GatherLayer::runByInputType() } } -void GatherLayer::runByGGMLQuantInputType() -{ - // Supporting condition - // Input: rank 2 - // Indice: rank < 4 or rank 4 with dim(0) = 1, INT32 - // Axis: 0 - if (getShape(_input).DimensionsCount() != 2) - throw std::runtime_error("Gather: block quantized input tensor must be rank 2"); - - if (getShape(_indices).DimensionsCount() >= 4 && - (getShape(_indices).DimensionsCount() != 4 || getShape(_indices).Dims(0) != 1)) - throw std::runtime_error("Gather: invalid indices tensor shape"); - - if (_indices->data_type() != ir::DataType::INT32) - throw std::runtime_error("Gather: indices tensor must be int32 type"); - - if (_axis != 0) - throw std::runtime_error("Gather: axis must be 0"); - - // convert tensor - auto input = getGGMLTensor(_input); - auto indices = getGGMLTensor(_indices); - auto output = getGGMLTensor(_output); - { - output.op = GGML_OP_GET_ROWS; - output.src[0] = &input; - output.src[1] = &indices; - } - auto *nodes = &output; - - // create graph - struct ggml_cgraph graph; - { - memset(&graph, 0, sizeof(graph)); - graph.n_nodes = 1; - graph.nodes = &nodes; - } - - // get cplan - auto cplan = ggml_graph_plan(&graph, _ctx->maxNumThreads()); - std::vector buf(cplan.work_size); - cplan.work_data = buf.data(); - - // compute - ggml_graph_compute(&graph, &cplan); -} - void GatherLayer::run() { switch (_input->data_type()) @@ -157,9 +118,6 @@ void GatherLayer::run() case OperandType::INT32: runByInputType(); break; - case OperandType::QUANT_GGML_Q4_0: - runByGGMLQuantInputType(); - break; case OperandType::BOOL8: runByInputType(); break; diff --git a/runtime/onert/backend/cpu/ops/GatherLayer.h b/runtime/onert/backend/cpu/ops/GatherLayer.h index 0761b0a8b5c..dcb7dc2bca6 100644 --- a/runtime/onert/backend/cpu/ops/GatherLayer.h +++ b/runtime/onert/backend/cpu/ops/GatherLayer.h @@ -17,8 +17,6 @@ #ifndef __ONERT_BACKEND_CPU_OPS_GATHERLAYER_H__ #define __ONERT_BACKEND_CPU_OPS_GATHERLAYER_H__ -#include "../ExternalContext.h" - #include #include @@ -29,20 +27,19 @@ namespace onert::backend::cpu::ops class GatherLayer : public ::onert::exec::IFunction { public: - GatherLayer() : _input{nullptr}, _indices{nullptr}, _output{nullptr}, _axis{-1}, _ctx{nullptr} + GatherLayer() : _input{nullptr}, _indices{nullptr}, _output{nullptr}, _axis{-1} { // DO NOTHING } public: void configure(const IPortableTensor *input, const IPortableTensor *indices, - IPortableTensor *output, int32_t axis, ExternalContext *ctx); + IPortableTensor *output, int32_t axis); void run() override; private: template void runByInputType(); - void runByGGMLQuantInputType(); private: const IPortableTensor *_input; @@ -50,7 +47,6 @@ class GatherLayer : public ::onert::exec::IFunction IPortableTensor *_output; int32_t _axis; - ExternalContext *_ctx; }; } // namespace onert::backend::cpu::ops diff --git a/runtime/onert/backend/ggml/CMakeLists.txt b/runtime/onert/backend/ggml/CMakeLists.txt index dab1412cb94..738490e123c 100644 --- a/runtime/onert/backend/ggml/CMakeLists.txt +++ b/runtime/onert/backend/ggml/CMakeLists.txt @@ -1,6 +1,7 @@ set(LIB_ONERT_BACKEND_GGML onert_backend_ggml) file(GLOB SOURCES "*.cc") +list(APPEND SOURCES ops/GGMLHelper.cc) macro(OP NAME) list(APPEND SOURCES ops/${NAME}Layer.cc) endmacro(OP) diff --git a/runtime/onert/backend/ggml/Operation.lst b/runtime/onert/backend/ggml/Operation.lst index e69de29bb2d..b0c2fd22507 100644 --- a/runtime/onert/backend/ggml/Operation.lst +++ b/runtime/onert/backend/ggml/Operation.lst @@ -0,0 +1,2 @@ +OP(FullyConnected) +OP(Gather) diff --git a/runtime/onert/backend/ggml/ops/FullyConnectedLayer.cc b/runtime/onert/backend/ggml/ops/FullyConnectedLayer.cc new file mode 100644 index 00000000000..656e908a84c --- /dev/null +++ b/runtime/onert/backend/ggml/ops/FullyConnectedLayer.cc @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "FullyConnectedLayer.h" + +#include "GGMLHelper.h" +#include "../KernelGenerator.h" +#include "../Validator.h" + +namespace onert::backend::ggml +{ + +void Validator::visit(const ir::operation::FullyConnected &node) +{ + using ir::operation::FullyConnected; + + const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)}; + const auto weight_node = &_graph.operands().at(weight_index); + + _supported = false; + + if (weight_node->typeInfo().type() != ir::DataType::QUANT_GGML_Q4_0 && + weight_node->typeInfo().type() != ir::DataType::QUANT_GGML_Q8_0) + return; + + if (node.param().activation != ir::Activation::NONE) + return; + + _supported = true; +} + +void KernelGenerator::visit(const ir::operation::FullyConnected &node) +{ + using ir::operation::FullyConnected; + + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)}; + const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)}; + const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)}; + const auto activation = node.param().activation; + const auto weights_format = node.param().weights_format; + if (weights_format != ir::FullyConnectedWeightsFormat::Default) + throw std::runtime_error("Unsupported FullyConnected Weights Format"); + + auto output_tensor = _tensor_reg->getPortableTensor(output_index); + auto input_tensor = _tensor_reg->getPortableTensor(input_index); + auto weight_tensor = _tensor_reg->getPortableTensor(weight_index); + auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index); + + auto fn = std::make_unique(); + + fn->configure(input_tensor, weight_tensor, bias_tensor, activation, output_tensor, + _external_context); + + _return_fn = std::move(fn); +} + +} // namespace onert::backend::ggml + +namespace onert::backend::ggml::ops +{ + +FullyConnectedLayer::FullyConnectedLayer() + : _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr), + _activation(ir::Activation::NONE), _external_context(nullptr) +{ + // DO NOTHING +} + +FullyConnectedLayer::~FullyConnectedLayer() = default; + +void FullyConnectedLayer::fullyConnectedGGMLWeight() +{ + if (_bias) + throw std::runtime_error{"FullyConnected: GGML weights format does not support bias yet."}; + + // convert tensor + auto input = getGGMLTensor(_input); + auto weights = getGGMLTensor(_weights); + auto output = getGGMLTensor(_output); + { + output.op = GGML_OP_MUL_MAT; + output.src[0] = &weights; + output.src[1] = &input; + } + auto *nodes = &output; + + // create graph + struct ggml_cgraph graph; + { + memset(&graph, 0, sizeof(graph)); + graph.n_nodes = 1; + graph.nodes = &nodes; + } + + // get cplan + auto cplan = ggml_graph_plan(&graph, _external_context->maxNumThreads()); + std::vector buf(cplan.work_size); + cplan.work_data = buf.data(); + + // compute + ggml_graph_compute(&graph, &cplan); +} + +void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights, + const IPortableTensor *bias, ir::Activation activation, + IPortableTensor *output, + const std::shared_ptr &external_context) +{ + _input = input; + _weights = weights; + _bias = bias; + _activation = activation; + _output = output; + _external_context = external_context; +} + +void FullyConnectedLayer::run() +{ + if (_weights->data_type() == ir::DataType::QUANT_GGML_Q4_0 || + _weights->data_type() == ir::DataType::QUANT_GGML_Q8_0) + { + fullyConnectedGGMLWeight(); + } + else + { + throw std::runtime_error{"FullyConnected: unsupported data type"}; + } +} + +void FullyConnectedLayer::prepare() +{ + // DO NOTHING +} + +} // namespace onert::backend::ggml::ops diff --git a/runtime/onert/backend/ggml/ops/FullyConnectedLayer.h b/runtime/onert/backend/ggml/ops/FullyConnectedLayer.h new file mode 100644 index 00000000000..18e12aaeb07 --- /dev/null +++ b/runtime/onert/backend/ggml/ops/FullyConnectedLayer.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_GGML_OPS_FULLYCONNECTEDLAYER_H__ +#define __ONERT_BACKEND_GGML_OPS_FULLYCONNECTEDLAYER_H__ + +#include "../ExternalContext.h" + +#include +#include +#include + +namespace onert::backend::ggml::ops +{ + +class FullyConnectedLayer : public ::onert::exec::IFunction +{ +public: + FullyConnectedLayer(); + ~FullyConnectedLayer(); + +public: + void fullyConnectedGGMLWeight(); + + void configure(const IPortableTensor *input, const IPortableTensor *weights, + const IPortableTensor *bias, ir::Activation activation, IPortableTensor *output, + const std::shared_ptr &external_context); + + void run() override; + + void prepare() override; + +protected: + const IPortableTensor *_input; + const IPortableTensor *_weights; + const IPortableTensor *_bias; + IPortableTensor *_output; + ir::Activation _activation; + + std::shared_ptr _external_context; +}; + +} // namespace onert::backend::ggml::ops + +#endif // __ONERT_BACKEND_GGML_OPS_FULLYCONNECTEDLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/GGMLHelper.cc b/runtime/onert/backend/ggml/ops/GGMLHelper.cc similarity index 95% rename from runtime/onert/backend/cpu/ops/GGMLHelper.cc rename to runtime/onert/backend/ggml/ops/GGMLHelper.cc index 8b66f024171..459baa8c2d3 100644 --- a/runtime/onert/backend/cpu/ops/GGMLHelper.cc +++ b/runtime/onert/backend/ggml/ops/GGMLHelper.cc @@ -16,7 +16,7 @@ #include "GGMLHelper.h" -namespace onert::backend::cpu::ops +namespace onert::backend::ggml::ops { ggml_type getGGMLType(ir::DataType type) @@ -64,4 +64,4 @@ struct ggml_tensor getGGMLTensor(const IPortableTensor *tensor) return res; } -} // namespace onert::backend::cpu::ops +} // namespace onert::backend::ggml::ops diff --git a/runtime/onert/backend/cpu/ops/GGMLHelper.h b/runtime/onert/backend/ggml/ops/GGMLHelper.h similarity index 82% rename from runtime/onert/backend/cpu/ops/GGMLHelper.h rename to runtime/onert/backend/ggml/ops/GGMLHelper.h index d692dc23d7d..1e55cce84ea 100644 --- a/runtime/onert/backend/cpu/ops/GGMLHelper.h +++ b/runtime/onert/backend/ggml/ops/GGMLHelper.h @@ -14,18 +14,18 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_CPU_GGML_HELPER_H__ -#define __ONERT_BACKEND_CPU_GGML_HELPER_H__ +#ifndef __ONERT_BACKEND_GGML_GGML_HELPER_H__ +#define __ONERT_BACKEND_GGML_GGML_HELPER_H__ #include #include -namespace onert::backend::cpu::ops +namespace onert::backend::ggml::ops { struct ggml_tensor getGGMLTensor(const IPortableTensor *tensor); -} // namespace onert::backend::cpu::ops +} // namespace onert::backend::ggml::ops #endif diff --git a/runtime/onert/backend/ggml/ops/GatherLayer.cc b/runtime/onert/backend/ggml/ops/GatherLayer.cc new file mode 100644 index 00000000000..25446d78a12 --- /dev/null +++ b/runtime/onert/backend/ggml/ops/GatherLayer.cc @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "GatherLayer.h" + +#include "GGMLHelper.h" +#include "OperationUtils.h" +#include "../KernelGenerator.h" +#include "../Validator.h" + +namespace onert::backend::ggml +{ + +void Validator::visit(const ir::operation::Gather &node) +{ + using ir::operation::Gather; + + const auto input_index{node.getInputs().at(Gather::Input::INPUT)}; + const auto input_node = &_graph.operands().at(input_index); + + _supported = false; + + if (input_node->typeInfo().type() != ir::DataType::QUANT_GGML_Q4_0) + return; + + _supported = true; +} + +void KernelGenerator::visit(const ir::operation::Gather &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)}; + const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)}; + + auto output_tensor = _tensor_reg->getPortableTensor(output_index); + auto input_tensor = _tensor_reg->getPortableTensor(input_index); + auto indices_tensor = _tensor_reg->getPortableTensor(indices_index); + + const auto rank = _ctx.at(input_index).shape().rank(); + const auto axis = ops::getAxis(rank, node.param().axis); + + auto fn = std::make_unique(); + + fn->configure(input_tensor, indices_tensor, output_tensor, axis, _external_context.get()); + + _return_fn = std::move(fn); +} + +} // namespace onert::backend::ggml + +namespace onert::backend::ggml::ops +{ + +void GatherLayer::configure(const IPortableTensor *input, const IPortableTensor *indices, + IPortableTensor *output, int32_t axis, ExternalContext *ctx) +{ + _input = input; + _indices = indices; + _axis = axis; + _output = output; + _ctx = ctx; +} + +void GatherLayer::runByGGMLQuantInputType() +{ + // Supporting condition + // Input: rank 2 + // Indice: rank < 4 or rank 4 with dim(0) = 1, INT32 + // Axis: 0 + if (_input->getShape().rank() != 2) + throw std::runtime_error("Gather: block quantized input tensor must be rank 2"); + + if (_indices->getShape().rank() >= 4 && + (_indices->getShape().rank() != 4 || _indices->getShape().dim(0) != 1)) + throw std::runtime_error("Gather: invalid indices tensor shape"); + + if (_indices->data_type() != ir::DataType::INT32) + throw std::runtime_error("Gather: indices tensor must be int32 type"); + + if (_axis != 0) + throw std::runtime_error("Gather: axis must be 0"); + + // convert tensor + auto input = getGGMLTensor(_input); + auto indices = getGGMLTensor(_indices); + auto output = getGGMLTensor(_output); + { + output.op = GGML_OP_GET_ROWS; + output.src[0] = &input; + output.src[1] = &indices; + } + auto *nodes = &output; + + // create graph + struct ggml_cgraph graph; + { + memset(&graph, 0, sizeof(graph)); + graph.n_nodes = 1; + graph.nodes = &nodes; + } + + // get cplan + auto cplan = ggml_graph_plan(&graph, _ctx->maxNumThreads()); + std::vector buf(cplan.work_size); + cplan.work_data = buf.data(); + + // compute + ggml_graph_compute(&graph, &cplan); +} + +void GatherLayer::run() +{ + switch (_input->data_type()) + { + case ir::DataType::QUANT_GGML_Q4_0: + runByGGMLQuantInputType(); + break; + default: + throw std::runtime_error("Gather: unsupported input data type"); + } +} + +} // namespace onert::backend::ggml::ops diff --git a/runtime/onert/backend/ggml/ops/GatherLayer.h b/runtime/onert/backend/ggml/ops/GatherLayer.h new file mode 100644 index 00000000000..eb5fcd81492 --- /dev/null +++ b/runtime/onert/backend/ggml/ops/GatherLayer.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_GGML_OPS_GATHERLAYER_H__ +#define __ONERT_BACKEND_GGML_OPS_GATHERLAYER_H__ + +#include "../ExternalContext.h" + +#include + +#include + +namespace onert::backend::ggml::ops +{ + +class GatherLayer : public ::onert::exec::IFunction +{ +public: + GatherLayer() : _input{nullptr}, _indices{nullptr}, _output{nullptr}, _axis{-1}, _ctx{nullptr} + { + // DO NOTHING + } + +public: + void configure(const IPortableTensor *input, const IPortableTensor *indices, + IPortableTensor *output, int32_t axis, ExternalContext *ctx); + + void run() override; + +private: + template void runByInputType(); + void runByGGMLQuantInputType(); + +private: + const IPortableTensor *_input; + const IPortableTensor *_indices; + IPortableTensor *_output; + + int32_t _axis; + ExternalContext *_ctx; +}; + +} // namespace onert::backend::ggml::ops + +#endif // __ONERT_BACKEND_GGML_OPS_GATHERLAYER_H__ diff --git a/runtime/onert/backend/ggml/ops/OperationUtils.h b/runtime/onert/backend/ggml/ops/OperationUtils.h new file mode 100644 index 00000000000..96a953f29e2 --- /dev/null +++ b/runtime/onert/backend/ggml/ops/OperationUtils.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_GGML_OPS_OPERATION_UTILS_H__ +#define __ONERT_BACKEND_GGML_OPS_OPERATION_UTILS_H__ + +#include + +namespace onert::backend::ggml::ops +{ + +inline int32_t getAxis(uint32_t rank, int32_t axis) +{ + auto ret = axis; + + if (axis < 0) + { + ret += rank; + } + + return ret; +} + +} // namespace onert::backend::ggml::ops + +#endif // __ONERT_BACKEND_GGML_OPS_OPERATION_UTILS_H__ diff --git a/runtime/tests/nnfw_api/src/GenModelTests/one_op_tests/Gather.test.cc b/runtime/tests/nnfw_api/src/GenModelTests/one_op_tests/Gather.test.cc index ac6dbd345f7..0868aa51cd6 100644 --- a/runtime/tests/nnfw_api/src/GenModelTests/one_op_tests/Gather.test.cc +++ b/runtime/tests/nnfw_api/src/GenModelTests/one_op_tests/Gather.test.cc @@ -74,7 +74,7 @@ TEST_F(GenModelTest, OneOp_Gather_Q4_0) tc.addInput({2}); tc.addOutput(std::vector{params.begin() + 64, params.begin() + 96}); _context->addTestCase(tc); - _context->setBackends({"cpu"}); + _context->setBackends({"ggml"}); SUCCEED(); } @@ -95,7 +95,7 @@ TEST_F(GenModelTest, neg_OneOp_Gather_Q4_0_InvalidOutType) cgen.setInputsAndOutputs({indice}, {output}); _context = std::make_unique(cgen.finish()); - _context->setBackends({"cpu"}); + _context->setBackends({"ggml"}); _context->expectFailModelLoad(); SUCCEED(); @@ -115,7 +115,7 @@ TEST_F(GenModelTest, neg_OneOp_Gather_Q4_0_shape) cgen.setInputsAndOutputs({indice}, {output}); _context = std::make_unique(cgen.finish()); - _context->setBackends({"cpu"}); + _context->setBackends({"ggml"}); _context->expectFailCompile(); SUCCEED();