diff --git a/runtime/onert/backend/acl_common/AclKernelGen.h b/runtime/onert/backend/acl_common/AclKernelGen.h
index f03209e242e..14ad5a27342 100644
--- a/runtime/onert/backend/acl_common/AclKernelGen.h
+++ b/runtime/onert/backend/acl_common/AclKernelGen.h
@@ -21,12 +21,15 @@
 #include "IACLTensor.h"
 
 #include <exec/IFunction.h>
+#include <exec/NopFunction.h>
 #include <ir/Operands.h>
 #include <ir/operation/FullyConnected.h>
 #include <ir/operation/LSTM.h>
 
 #include <arm_compute/runtime/CL/CLFunctions.h>
 #include <arm_compute/runtime/CL/CLFunctionsEx.h>
+#include <arm_compute/runtime/NEON/NEFunctions.h>
+#include <arm_compute/runtime/NEON/NEFunctionsEx.h>
 
 namespace onert::backend::acl_common
 {
diff --git a/runtime/onert/backend/acl_neon/CMakeLists.txt b/runtime/onert/backend/acl_neon/CMakeLists.txt
index 7d317ddc821..21283e2937b 100644
--- a/runtime/onert/backend/acl_neon/CMakeLists.txt
+++ b/runtime/onert/backend/acl_neon/CMakeLists.txt
@@ -10,8 +10,7 @@ file(GLOB SOURCES "*.cc")
 file(GLOB OPERAND_SOURCES "operand/*.cc")
 list(APPEND SOURCES ${OPERAND_SOURCES})
 macro(OP NAME)
-  # ACL_NEON backend doesn't have separate operation files,
-  # but we include the macro for consistency with other backend pattern
+  list(APPEND SOURCES ops/${NAME}.cc)
 endmacro(OP)
 include(Operation.lst)
 
diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.cc b/runtime/onert/backend/acl_neon/KernelGenerator.cc
index 2512f0c8e81..5e21b201847 100644
--- a/runtime/onert/backend/acl_neon/KernelGenerator.cc
+++ b/runtime/onert/backend/acl_neon/KernelGenerator.cc
@@ -58,1310 +58,4 @@ std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationI
   return ret;
 }
 
-void KernelGenerator::visit(const ir::operation::ArgMinMax &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)};
-  const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)};
-
-  const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
-
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
-
-  int axis_value = _ctx.at(axis_index).asScalar<int32_t>();
-  if (axis_value < 0)
-  {
-    axis_value += ifm_rank;
-  }
-  assert(axis_value >= 0 && axis_value < ifm_rank);
-  const auto fixed_axis = acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
-  auto reduce_type = node.param().is_arg_max ? ::arm_compute::ReductionOperation::ARG_IDX_MAX
-                                             : ::arm_compute::ReductionOperation::ARG_IDX_MIN;
-
-  auto fn = acl_common::generateLayer<arm_compute::NEArgMinMaxLayer>(
-    ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(), reduce_type);
-
-  _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)};
-  const auto block_size_index{
-    node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
-
-  const auto NNApiInputs = 2;
-  if (node.getInputs().size() != NNApiInputs)
-  {
-    const auto crops_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::CROPS_DATA)};
-    if (!_ctx.at(crops_index).isConstant())
-    {
-      throw std::runtime_error("Non-constant crops NYI for acl_neon backend BatchToSpaceND");
-    }
-
-    auto crops = _ctx.at(crops_index).asVector<int32_t>();
-    for (auto &&crop : crops)
-    {
-      if (crop != 0)
-      {
-        throw std::runtime_error("Non-zero crops NYI for acl_neon backend BatchToSpaceND");
-      }
-    }
-  }
-
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
-
-  if (!_ctx.at(block_size_index).data())
-    throw std::runtime_error("ACL NEON does not support dynamic block size for BatchToSpaceND");
-
-  auto block = _ctx.at(block_size_index).asVector<int32_t>();
-  int32_t height = block[0];
-  int32_t width = block[1];
-
-  auto fn = acl_common::generateLayer<arm_compute::NEBatchToSpaceLayer>(
-    ifm_tensor->handle(), width, height, ofm_tensor->handle());
-
-  _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
-
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
-  auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index);
-  auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index);
-
-  std::unique_ptr<arm_compute::IFunction> fn;
-  switch (node.param().arithmetic_type)
-  {
-    case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
-    {
-      arm_compute::NEArithmeticAddition::validate(lhs_tensor->info(), rhs_tensor->info(),
-                                                  ofm_tensor->info(),
-                                                  arm_compute::ConvertPolicy::SATURATE)
-        .throw_if_error();
-      fn = acl_common::generateLayer<arm_compute::NEArithmeticAddition>(
-        lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
-        arm_compute::ConvertPolicy::SATURATE);
-      break;
-    }
-    case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
-    {
-      arm_compute::NEArithmeticSubtraction::validate(lhs_tensor->info(), rhs_tensor->info(),
-                                                     ofm_tensor->info(),
-                                                     arm_compute::ConvertPolicy::SATURATE)
-        .throw_if_error();
-      fn = acl_common::generateLayer<arm_compute::NEArithmeticSubtraction>(
-        lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
-        arm_compute::ConvertPolicy::SATURATE);
-      break;
-    }
-    case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
-    {
-      arm_compute::NEPixelWiseMultiplication::validate(
-        lhs_tensor->info(), rhs_tensor->info(), ofm_tensor->info(), 1.0,
-        arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO)
-        .throw_if_error();
-      // RoundingPolicy for scale:1.0 is only allowed RoundingPolicy::TO_ZERO
-      fn = acl_common::generateLayer<arm_compute::NEPixelWiseMultiplication>(
-        lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
-        arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO);
-      break;
-    }
-    case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
-    {
-      arm_compute::NEElementwiseDivision::validate(lhs_tensor->info(), rhs_tensor->info(),
-                                                   ofm_tensor->info())
-        .throw_if_error();
-      fn = acl_common::generateLayer<arm_compute::NEElementwiseDivision>(
-        lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
-      break;
-    }
-    default:
-      assert(false && "The BinaryArithmetic operation supports only binary arithmetic operations");
-      break;
-  }
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-    asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::Conv2D &node)
-{
-  using ir::operation::Conv2D;
-
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
-  const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
-  const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
-
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
-  // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
-  const auto &ker_shape = _ctx.at(ker_index).shape();
-  const auto ker_height = ker_shape.dim(1);
-  const auto ker_width = ker_shape.dim(2);
-
-  const auto stride = node.param().stride;
-  const auto padding =
-    ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, ker_width, ker_height);
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
-  auto ker_tensor = _tensor_reg->getAclTensor(ker_index);
-  auto bias_tensor = _tensor_reg->getAclTensor(bias_index);
-
-  const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
-  const auto act_info = acl_common::asActivationLayerInfo(activation);
-
-  auto fn = acl_common::generateLayer<arm_compute::NEConvolutionLayer>(
-    _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
-    ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), conv_info,
-    ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
-
-  _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
-
-  auto block_size = node.param().block_size;
-  assert(block_size > 0);
-
-  auto output_tensor = _tensor_reg->getAclTensor(output_index);
-  auto input_tensor = _tensor_reg->getAclTensor(input_index);
-
-  auto fn = acl_common::generateLayer<arm_compute::NEDepthToSpaceLayer>(
-    input_tensor->handle(), output_tensor->handle(), block_size);
-
-  _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
-{
-  using ir::operation::DepthwiseConv2D;
-
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
-  const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
-  const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
-
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
-  // Kernel format is [1, kernel_height, kernel_width, depth_out].
-  const auto &ker_shape = _ctx.at(ker_index).shape();
-  const auto ker_height = ker_shape.dim(1);
-  const auto ker_width = ker_shape.dim(2);
-
-  const auto stride = node.param().stride;
-  const auto dilation = node.param().dilation;
-  const auto padding =
-    ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
-                         dilation.width_factor, dilation.height_factor);
-  const auto multiplier = node.param().multiplier;
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
-  auto ker_tensor = _tensor_reg->getAclTensor(ker_index);
-  auto bias_tensor = _tensor_reg->getAclTensor(bias_index);
-
-  const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
-  const auto act_info = acl_common::asActivationLayerInfo(activation);
-  const auto dilation_info = acl_common::asDilation(dilation.width_factor, dilation.height_factor);
-
-  auto fn = acl_common::generateLayer<arm_compute::NEDepthwiseConvolutionLayer>(
-    ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(),
-    conv_info, multiplier, act_info, dilation_info);
-
-  _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Concat &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-
-  std::vector<ir::OperandIndex> input_indexes;
-  for (const auto &input : node.getInputs())
-    input_indexes.emplace_back(input);
-
-  const auto axis = node.param().axis;
-
-  // Concat elimination check
-  bool eliminated = _tensor_builder->areSubTensorsOf(ofm_index, node.getInputs());
-  if (eliminated)
-  {
-    // If concat eliminated, return a NOP IFunction
-    VERBOSE(acl_neon_KernelGenerator_Concat) << "Concat eliminated" << std::endl;
-    _return_fn = std::make_unique<exec::NopFunction>();
-    return;
-  }
-
-  auto output_tensor = _tensor_reg->getAclTensor(ofm_index);
-  std::vector<const ::arm_compute::ITensor *> input_tensors;
-  for (const auto &ifm_ind : input_indexes)
-    input_tensors.emplace_back(_tensor_reg->getAclTensor(ifm_ind)->handle());
-
-  std::unique_ptr<::arm_compute::IFunction> fn;
-  if (input_indexes.size() < 2)
-  {
-    ::arm_compute::ITensor *input_tesor = _tensor_reg->getAclTensor(input_indexes.at(0))->handle();
-    fn = acl_common::generateLayer<arm_compute::NECopy>(input_tesor, output_tensor->handle());
-  }
-  else
-  {
-    const auto rank = _ctx.at(ofm_index).shape().rank();
-    const auto fixed_axis = acl_common::ToARMComputeAxis(rank, axis).value();
-    fn = acl_common::generateLayer<arm_compute::NEConcatenateLayer>(
-      input_tensors, output_tensor->handle(), fixed_axis);
-  }
-
-  _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
-
-  const ::arm_compute::ActivationLayerInfo act_info =
-    acl_common::asActivationLayerInfo(node.param().op_type, node.param().alpha, node.param().beta);
-
-  std::unique_ptr<arm_compute::IFunction> fn =
-    acl_common::generateLayer<arm_compute::NEActivationLayer>(ifm_tensor->handle(),
-                                                              ofm_tensor->handle(), act_info);
-
-  _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
-
-  auto output_tensor = _tensor_reg->getAclTensor(output_index);
-  auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index);
-  auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index);
-
-  std::unique_ptr<arm_compute::IFunction> fn;
-  switch (node.param().op_type)
-  {
-    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND:
-    {
-      fn = acl_common::generateLayer<arm_compute::NELogicalAnd>(
-        lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
-      break;
-    }
-    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
-    {
-      fn = acl_common::generateLayer<arm_compute::NELogicalOr>(
-        lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
-      break;
-    }
-    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
-    {
-      fn = acl_common::generateLayer<arm_compute::NEElementwiseMax>(
-        lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
-      break;
-    }
-    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
-    {
-      fn = acl_common::generateLayer<arm_compute::NEElementwiseMin>(
-        lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
-      break;
-    }
-    default:
-    {
-      std::string err_msg("acl_neon KernelGenerator : " + node.name() +
-                          "is not elementwise-binary operations");
-      assert(false && err_msg.c_str());
-      break;
-    }
-  }
-  _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
-
-  auto output_tensor = _tensor_reg->getAclTensor(output_index);
-  auto input_tensor = _tensor_reg->getAclTensor(input_index);
-
-  std::unique_ptr<arm_compute::IFunction> fn;
-  switch (node.param().op_type)
-  {
-    case ir::operation::ElementwiseUnary::Type::ABS:
-    {
-      const ::arm_compute::ActivationLayerInfo act_info{
-        ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
-
-      fn = acl_common::generateLayer<arm_compute::NEActivationLayer>(
-        input_tensor->handle(), output_tensor->handle(), act_info);
-      break;
-    }
-    case ir::operation::ElementwiseUnary::Type::CAST:
-    {
-      if (input_tensor->data_type() == output_tensor->data_type())
-      {
-        fn = acl_common::generateLayer<arm_compute::NECopy>(input_tensor->handle(),
-                                                            output_tensor->handle());
-      }
-      else if (_ctx.at(input_index).typeInfo().type() == ir::DataType::BOOL8)
-      {
-        fn = acl_common::generateLayer<arm_compute::NECastBool>(input_tensor->handle(),
-                                                                output_tensor->handle());
-      }
-      else
-      {
-        fn = acl_common::generateLayer<arm_compute::NECast>(
-          input_tensor->handle(), output_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
-      }
-      break;
-    }
-    case ir::operation::ElementwiseUnary::Type::DEQUANTIZE:
-    {
-      fn = acl_common::generateLayer<arm_compute::NEDequantizationLayer>(input_tensor->handle(),
-                                                                         output_tensor->handle());
-      break;
-    }
-    case ir::operation::ElementwiseUnary::Type::EXP:
-    {
-      fn = acl_common::generateLayer<arm_compute::NEExpLayer>(input_tensor->handle(),
-                                                              output_tensor->handle());
-      break;
-    }
-    case ir::operation::ElementwiseUnary::Type::FLOOR:
-    {
-      fn = acl_common::generateLayer<arm_compute::NEFloor>(input_tensor->handle(),
-                                                           output_tensor->handle());
-      break;
-    }
-    case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT:
-    {
-      fn = acl_common::generateLayer<arm_compute::NEBitwiseNot>(input_tensor->handle(),
-                                                                output_tensor->handle());
-      break;
-    }
-    case ir::operation::ElementwiseUnary::Type::NEG:
-    {
-      fn = acl_common::generateLayer<arm_compute::NENegLayer>(input_tensor->handle(),
-                                                              output_tensor->handle());
-      break;
-    }
-    case ir::operation::ElementwiseUnary::Type::RSQRT:
-    {
-      fn = acl_common::generateLayer<arm_compute::NERsqrtLayer>(input_tensor->handle(),
-                                                                output_tensor->handle());
-      break;
-    }
-    case ir::operation::ElementwiseUnary::Type::SQRT:
-    {
-      const ::arm_compute::ActivationLayerInfo act_info{
-        ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
-
-      fn = acl_common::generateLayer<arm_compute::NEActivationLayer>(
-        input_tensor->handle(), output_tensor->handle(), act_info);
-      break;
-    }
-    default:
-    {
-      throw std::runtime_error("acl_neon KernelGenerator : " + node.name() +
-                               "is not supported yet");
-      break;
-    }
-  }
-  _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
-  const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
-
-  auto output_tensor = _tensor_reg->getAclTensor(output_index);
-  auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index);
-  auto values_tensor = _tensor_reg->getAclTensor(values_index);
-
-  size_t n = _ctx.at(values_index).shape().rank();
-  assert(n == values_tensor->num_dimensions());
-  size_t k = _ctx.at(lookups_index).shape().rank();
-  assert(k == lookups_tensor->num_dimensions());
-
-  const int axis = ::onert::backend::acl_common::ToARMComputeAxis(n, 0).value();
-
-  // Disable applied dim_correction
-  if (n != values_tensor->info()->num_dimensions())
-  {
-    // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
-    acl_common::disableDimCorrection(values_tensor);
-  }
-  if (k != lookups_tensor->info()->num_dimensions())
-  {
-    // This means that high dimension's value is 1 and indices tensor is applied dim_correction
-    acl_common::disableDimCorrection(lookups_tensor);
-  }
-
-  auto fn = acl_common::generateLayer<arm_compute::NEGather>(
-    values_tensor->handle(), lookups_tensor->handle(), output_tensor->handle(), axis);
-
-  // Revert disabling applied dim_correction
-  if (values_tensor->dimension(0) == 1)
-  {
-    acl_common::enableDimCorrection(values_tensor);
-  }
-  if (lookups_tensor->dimension(0) == 1)
-  {
-    acl_common::enableDimCorrection(lookups_tensor);
-  }
-
-  _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::FullyConnected &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  auto output_tensor = _tensor_reg->getAclTensor(output_index);
-  const auto activation = node.param().activation;
-  if (node.param().weights_format == ir::FullyConnectedWeightsFormat::Shuffled16x1Float32)
-    throw std::runtime_error(
-      "KernelGenerator(acl_neon): FullyConnected 16x1Float32 weights is not supported.");
-
-  auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ITensor,
-                                                ::arm_compute::NEFullyConnectedReshapingLayer>(
-    node, _ctx, _tensor_builder, _tensor_reg);
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-    std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
-{
-  const auto output_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::OUTPUT)};
-  const auto hits_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::HITS)};
-
-  const auto lookups_index{node.getInputs().at(ir::operation::HashtableLookup::Input::LOOKUPS)};
-  const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
-  const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
-
-  auto output_tensor = _tensor_reg->getAclTensor(output_index);
-  auto hits_tensor = _tensor_reg->getAclTensor(hits_index);
-
-  auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index);
-  auto keys_tensor = _tensor_reg->getAclTensor(keys_index);
-  auto values_tensor = _tensor_reg->getAclTensor(values_index);
-
-  auto fn = acl_common::generateLayer<arm_compute::NEHashtableLookup>(
-    lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
-    output_tensor->handle(), hits_tensor->handle());
-
-  _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Gather &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-
-  const auto ifm_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
-  const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
-
-  const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
-  const auto axis_raw = node.param().axis;
-  const auto axis_value = (axis_raw < 0 ? (ifm_rank + axis_raw) : axis_raw);
-  // Converting in reverse order
-  const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
-
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
-  auto indices_tensor = _tensor_reg->getAclTensor(indices_index);
-
-  // input is n-D, indices k-D, output is (n + k - 1)-D
-  size_t n = ifm_rank;
-  assert(n == ifm_tensor->num_dimensions());
-  size_t k = _ctx.at(indices_index).shape().rank();
-  assert(k == indices_tensor->num_dimensions());
-
-  // Disable applied dim_correction
-  if (n != ifm_tensor->info()->num_dimensions())
-  {
-    // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
-    acl_common::disableDimCorrection(ifm_tensor);
-  }
-  if (k != indices_tensor->info()->num_dimensions())
-  {
-    // This means that high dimension's value is 1 and indices tensor is applied dim_correction
-    acl_common::disableDimCorrection(indices_tensor);
-  }
-
-  auto fn = acl_common::generateLayer<arm_compute::NEGather>(
-    ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
-
-  // Revert disabling applied dim_correction
-  if (ifm_tensor->dimension(0) == 1)
-  {
-    acl_common::enableDimCorrection(ifm_tensor);
-  }
-  if (indices_tensor->dimension(0) == 1)
-  {
-    acl_common::enableDimCorrection(indices_tensor);
-  }
-
-  _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::InstanceNorm::Input::INPUT)};
-  const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
-  const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
-
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
-  auto epsilon = node.param().epsilon;
-  auto activation = node.param().activation;
-
-  if (!_ctx.at(gamma_index).isConstant() || !_ctx.at(beta_index).isConstant())
-    throw std::runtime_error{"Non-constant gamma or beta for acl_neon backend InstanceNorm"};
-
-  auto gamma = _ctx.at(gamma_index).asScalar<float>();
-  auto beta = _ctx.at(beta_index).asScalar<float>();
-  auto fn = acl_common::generateLayer<arm_compute::NEInstanceNormalizationLayer>(
-    ifm_tensor->handle(), ofm_tensor->handle(), gamma, beta, epsilon);
-
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-    asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::L2Normalization &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::L2Normalization::Input::INPUT)};
-
-  // {CL|Neon}L2Normalization performs the reduction only along dimension 0
-  // L2 Normalization always performs the reduction along the depth axis
-  // Thus, we repurpose {CL|Neon}NormalizationLayers to act as depthwise L2 normalizations by
-  // choosing normalization parameters as below
-
-  const auto &ifm_shape = _ctx.at(ifm_index).shape();
-  // TODO Support optional constant dimension that normalization would be performed on
-  const auto normalization_axis = _ctx.at(ifm_index).shape().rank() - 1;
-  int32_t radius =
-    2 * ifm_shape.dim(normalization_axis) + 1; // normSize = depth(last dimension) * 2 + 1
-  float alpha = 1.0f;                          // In the implementation to make alpha_ become 1
-  float beta = 0.5f;                           // pow(reduction, -0.5) = 1 / sqrt(reduction)
-  float bias = 0.0f;                           // Don't offset the reduction.
-
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
-
-  const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
-                                                               radius, alpha, beta, bias, false);
-
-  auto fn = acl_common::generateLayer<arm_compute::NENormalizationLayer>(
-    ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
-
-  _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{
-    node.getInputs().at(ir::operation::LocalResponseNormalization::Input::INPUT)};
-
-  auto radius = node.param().radius;
-  auto alpha = node.param().alpha;
-  auto beta = node.param().beta;
-  auto bias = node.param().bias;
-
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
-
-  const auto norm_info = ::arm_compute::NormalizationLayerInfo(
-    ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
-
-  auto fn = acl_common::generateLayer<arm_compute::NENormalizationLayer>(
-    ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
-
-  _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::LSTM &node)
-{
-  _return_fn = acl_common::kernelGenLSTM<acl_common::AclFunction, ::arm_compute::ITensor,
-                                         ::arm_compute::NELSTMLayer>(node, _ctx, _tensor_reg);
-}
-
-void KernelGenerator::visit(const ir::operation::Pack &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  auto axis{node.param().axis};
-
-  const auto output_rank = _ctx.at(output_index).shape().rank();
-
-  std::vector<ir::OperandIndex> input_indexes;
-  for (const auto &input_index : node.getInputs())
-    input_indexes.emplace_back(input_index);
-
-  auto output = _tensor_reg->getAclTensor(output_index)->handle();
-  std::vector<arm_compute::ITensor *> inputs;
-  for (const auto &input_index : input_indexes)
-    inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle());
-
-  if (axis < 0)
-    axis += output_rank;
-  axis = acl_common::ToARMComputeAxis(output_rank, axis).value();
-
-  // Disable applied dim_correction
-  for (const auto &input_index : input_indexes)
-  {
-    const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
-    if (input_tensor->num_dimensions() != input_tensor->info()->num_dimensions())
-    {
-      // This means that high dimension's value is 1 and input tensor is applied dim_correction
-      acl_common::disableDimCorrection(input_tensor);
-    }
-  }
-
-  auto fn = acl_common::generateLayer<arm_compute::NEStackLayer>(inputs, axis, output);
-
-  // Revert disabling applied dim_correction
-  for (const auto &input_index : input_indexes)
-  {
-    const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
-    if (input_tensor->dimension(0) == 1)
-    {
-      acl_common::enableDimCorrection(input_tensor);
-    }
-  }
-
-  _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Pad &node)
-{
-  const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
-  const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
-  const auto output_index{node.getOutputs().at(0)};
-  assert(_ctx.at(pad_index).data());
-
-  auto rank = _ctx.at(input_index).shape().rank();
-  auto pad_base = _ctx.at(pad_index).data()->base();
-
-  auto input = _tensor_reg->getAclTensor(input_index)->handle();
-  auto output = _tensor_reg->getAclTensor(output_index)->handle();
-
-  ::arm_compute::PaddingList padding_list;
-  padding_list.resize(rank);
-  for (int32_t n = 0; n < rank; ++n)
-  {
-    const int32_t *from = reinterpret_cast<const int32_t *>(pad_base) + (n * 2);
-
-    const auto axis = acl_common::ToARMComputeAxis(rank, n).value();
-    padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]};
-  }
-
-  [[maybe_unused]] const auto input_type = _ctx.at(input_index).typeInfo();
-  assert(input->info()->data_type() == acl_common::asDataType(input_type.type()));
-  assert(input->info()->quantization_info() ==
-         ::arm_compute::QuantizationInfo(input_type.scale(), input_type.zero_point()));
-  const auto pixel_value =
-    ::arm_compute::PixelValue(0, input->info()->data_type(), input->info()->quantization_info());
-
-  auto fn =
-    acl_common::generateLayer<arm_compute::NEPadLayer>(input, output, padding_list, pixel_value);
-
-  _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Pool2D &node)
-{
-  auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
-    node, _ctx, _tensor_reg, acl_common::convertPoolType(node.param().op_type));
-
-  const auto ofm_index{node.getOutputs().at(0)};
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
-  const auto activation = node.param().activation;
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-    asAclFunction(std::move(raw_fn)),
-    ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::PReLU &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
-  const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
-
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
-  auto alpha_tensor = _tensor_reg->getAclTensor(alpha_index);
-
-  auto fn = acl_common::generateLayer<arm_compute::NEPReluLayer>(
-    ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
-
-  _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Reduce &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
-  const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
-
-  auto output_tensor = _tensor_reg->getAclTensor(output_index);
-  auto input_tensor = _tensor_reg->getAclTensor(input_index);
-
-  // Convert to ACL axes taking into account negative values and possible duplicates.
-  const auto &axes = _ctx.at(axes_index);
-  const auto input_rank = _ctx.at(input_index).shape().rank();
-  const auto reduce_axes = acl_common::asCoordinates(axes, input_rank);
-  const auto reduce_type = node.param().reduce_type;
-  const auto keep_dims = node.param().keep_dims;
-
-  std::unique_ptr<::arm_compute::IFunction> fn;
-  if (reduce_type == ir::operation::Reduce::ReduceType::MEAN)
-  {
-    fn = acl_common::generateLayer<arm_compute::NEReduceMean>(input_tensor->handle(), reduce_axes,
-                                                              keep_dims, output_tensor->handle());
-  }
-  else if (reduce_type == ir::operation::Reduce::ReduceType::SUM)
-  {
-    fn = acl_common::generateLayer<arm_compute::NEReduceSum>(input_tensor->handle(), reduce_axes,
-                                                             keep_dims, output_tensor->handle());
-  }
-  else
-  {
-    fn = acl_common::generateLayer<arm_compute::NEReduceOperation>(
-      input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle(),
-      acl_common::convertReduceType(reduce_type));
-  }
-  _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Reshape &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
-
-  auto output_tensor = _tensor_reg->getAclTensor(output_index);
-  auto input_tensor = _tensor_reg->getAclTensor(input_index);
-
-  auto fn = acl_common::generateLayer<arm_compute::NEReshapeLayer>(input_tensor->handle(),
-                                                                   output_tensor->handle());
-
-  _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
-
-  auto fn = acl_common::generateLayer<arm_compute::NEScale>(
-    ifm_tensor->handle(), ofm_tensor->handle(),
-    ::arm_compute::ScaleKernelInfo{::arm_compute::InterpolationPolicy::BILINEAR,
-                                   ::arm_compute::BorderMode::REPLICATE,
-                                   ::arm_compute::PixelValue(0.f),
-                                   ::arm_compute::SamplingPolicy::TOP_LEFT, false /*use padding*/});
-
-  _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::RNN &node)
-{
-  const auto output_index{node.getOutputs().at(ir::operation::RNN::Output::OUTPUT)};
-  const auto hidden_state_out_index{
-    node.getOutputs().at(ir::operation::RNN::Output::HIDDEN_STATE_OUT)};
-
-  const auto input_index{node.getInputs().at(ir::operation::RNN::Input::INPUT)};
-  const auto weights_index{node.getInputs().at(ir::operation::RNN::Input::WEIGHTS)};
-  const auto recurrent_weights_index{
-    node.getInputs().at(ir::operation::RNN::Input::RECURRENT_WEIGHTS)};
-  const auto bias_index{node.getInputs().at(ir::operation::RNN::Input::BIAS)};
-  const auto hidden_state_in_index{node.getInputs().at(ir::operation::RNN::Input::HIDDEN_STATE_IN)};
-
-  const auto activation = node.param().activation;
-
-  auto output_tensor = _tensor_reg->getAclTensor(output_index);
-  auto hidden_state_out_tensor = _tensor_reg->getAclTensor(hidden_state_out_index);
-
-  auto input_tensor = _tensor_reg->getAclTensor(input_index);
-  auto weights_tensor = _tensor_reg->getAclTensor(weights_index);
-  auto recurrent_weights_tensor = _tensor_reg->getAclTensor(recurrent_weights_index);
-  auto bias_tensor = _tensor_reg->getAclTensor(bias_index);
-  auto hidden_state_in_tensor = _tensor_reg->getAclTensor(hidden_state_in_index);
-  auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
-
-  auto copy_layer = acl_common::generateLayer<arm_compute::NECopy>(
-    hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
-  _return_fn = asAclFunction(std::move(copy_layer));
-
-  auto fn = acl_common::generateLayer<arm_compute::NERNNLayer>(
-    _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
-    weights_tensor->handle(), recurrent_weights_tensor->handle(), bias_tensor->handle(),
-    hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
-  _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Squeeze &node)
-{
-  // Squeeze is identical to reshape except that it has an optional dimensions input.
-  // In addition, optional dims_index is ignored since output tensor already has squeezed shape
-  // by freezer and toco
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
-  const auto dims{node.param().dims};
-  const auto ndim{node.param().ndim};
-  (void)dims;
-  (void)ndim;
-
-  auto output_tensor = _tensor_reg->getAclTensor(output_index);
-  auto input_tensor = _tensor_reg->getAclTensor(input_index);
-  auto fn = acl_common::generateLayer<arm_compute::NEReshapeLayer>(input_tensor->handle(),
-                                                                   output_tensor->handle());
-  _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Softmax &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
-  const auto beta = node.param().beta;
-
-  auto output_tensor = _tensor_reg->getAclTensor(output_index);
-  auto input_tensor = _tensor_reg->getAclTensor(input_index);
-
-  // NOTE NESoftmaxLayer's default axis is -1
-  auto fn = acl_common::generateLayer<arm_compute::NESoftmaxLayer>(
-    _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
-    output_tensor->handle(), beta);
-
-  _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
-  const auto block_size_index{
-    node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
-  const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
-
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
-  auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index);
-  auto paddings_tensor = _tensor_reg->getAclTensor(paddings_index);
-
-  assert(_ctx.at(block_size_index).data());
-  assert(_ctx.at(paddings_index).data());
-
-  auto fn = acl_common::generateLayer<arm_compute::NESpaceToBatchLayer>(
-    ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
-    ofm_tensor->handle());
-
-  _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
-
-  auto block_size = node.param().block_size;
-
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
-
-  auto fn = acl_common::generateLayer<arm_compute::NESpaceToDepthLayer>(
-    ifm_tensor->handle(), ofm_tensor->handle(), block_size);
-
-  _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Split &node)
-{
-  // TODO Support this op by SubTensor
-  const auto ifm_index{node.getInputs().at(ir::operation::Split::Input::INPUT)};
-  const auto axis_index{node.getInputs().at(ir::operation::Split::Input::AXIS)};
-
-  assert(node.param().num_splits == static_cast<int>(node.getOutputs().size()));
-  if (!_ctx.at(axis_index).isConstant())
-  {
-    throw std::runtime_error("Non-constant axis_index NYI for acl_neon backend");
-  }
-
-  const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
-  std::vector<ir::OperandIndex> output_indexes;
-  for (const auto &output : node.getOutputs())
-    output_indexes.emplace_back(output);
-
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
-  std::vector<arm_compute::ITensor *> output_tensors;
-  for (const auto &ofm_ind : output_indexes)
-    output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind)->handle());
-
-  auto axis = _ctx.at(axis_index).asScalar<int32_t>();
-  if (axis < 0)
-    axis += ifm_rank;
-  axis = acl_common::ToARMComputeAxis(ifm_rank, axis).value();
-
-  auto fn =
-    acl_common::generateLayer<arm_compute::NESplit>(ifm_tensor->handle(), output_tensors, axis);
-
-  _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
-
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
-  auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index);
-  auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index);
-
-  auto fn = acl_common::generateLayer<arm_compute::NEElementwiseSquaredDiff>(
-    lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
-
-  _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Slice &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Slice::Input::INPUT)};
-  const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
-  const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
-
-  auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
-  auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
-
-  // Set initializers for indices data such as order of inputData
-  int input_rank = _ctx.at(input_index).shape().rank();
-  std::vector<int32_t> starts;
-  std::vector<int32_t> ends;
-  starts.resize(input_rank, 0);
-  ends.resize(input_rank, 0);
-  {
-    auto beginData_base = _ctx.at(begins_index).data()->base();
-    auto sizeData_base = _ctx.at(sizes_index).data()->base();
-    [[maybe_unused]] const int beginData_size = _ctx.at(begins_index).shape().num_elements();
-    [[maybe_unused]] const int sizeData_size = _ctx.at(sizes_index).shape().num_elements();
-
-    using ir::DataType;
-
-    assert(_ctx.at(begins_index).typeInfo().type() == DataType::INT32);
-    assert(_ctx.at(sizes_index).typeInfo().type() == DataType::INT32);
-    assert(beginData_size == input_rank);
-    assert(sizeData_size == input_rank);
-
-    assert(beginData_base != nullptr);
-    for (int n = 0; n < input_rank; ++n)
-    {
-      auto axis = ::onert::backend::acl_common::ToARMComputeAxis(input_rank, n).value();
-
-      int32_t begin_value = *(reinterpret_cast<const int32_t *>(beginData_base) + n);
-      starts[axis] = begin_value;
-
-      int32_t size_value = *(reinterpret_cast<const int32_t *>(sizeData_base) + n);
-      ends[axis] = begin_value + size_value;
-    }
-  }
-
-  ::arm_compute::Coordinates starts_set;
-  ::arm_compute::Coordinates ends_set;
-
-  for (size_t i = 0; i < starts.size(); ++i)
-  {
-    starts_set.set(i, starts[i]);
-    ends_set.set(i, ends[i]);
-  }
-
-  auto fn = acl_common::generateLayer<arm_compute::NESlice>(
-    inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
-
-  _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::StridedSlice &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
-  const auto starts_index{node.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
-  const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
-  const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
-
-  auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
-  auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
-
-  // Set initializers for indices data such as order of inputData
-  int input_rank = _ctx.at(input_index).shape().rank();
-  std::vector<int32_t> starts;
-  std::vector<int32_t> ends;
-  std::vector<int32_t> strides;
-  starts.resize(input_rank, 0);
-  ends.resize(input_rank, 0);
-  strides.resize(input_rank, 0);
-  {
-    auto startData_base = _ctx.at(starts_index).data()->base();
-    auto endData_base = _ctx.at(ends_index).data()->base();
-    auto stridesData_base = _ctx.at(strides_index).data()->base();
-    [[maybe_unused]] const int startData_size = _ctx.at(starts_index).shape().num_elements();
-    [[maybe_unused]] const int endData_size = _ctx.at(ends_index).shape().num_elements();
-    [[maybe_unused]] const int stridesData_size = _ctx.at(strides_index).shape().num_elements();
-
-    using ir::DataType;
-
-    assert(_ctx.at(starts_index).typeInfo().type() == DataType::INT32);
-    assert(_ctx.at(ends_index).typeInfo().type() == DataType::INT32);
-    assert(_ctx.at(strides_index).typeInfo().type() == DataType::INT32);
-    assert(startData_size == input_rank);
-    assert(endData_size == input_rank);
-    assert(stridesData_size == input_rank);
-
-    assert(startData_base != nullptr);
-    for (int n = 0; n < input_rank; ++n)
-    {
-      auto axis = ::onert::backend::acl_common::ToARMComputeAxis(input_rank, n).value();
-
-      int32_t start_value = *(reinterpret_cast<const int32_t *>(startData_base) + n);
-      starts[axis] = start_value;
-
-      int32_t end_value = *(reinterpret_cast<const int32_t *>(endData_base) + n);
-      ends[axis] = end_value;
-
-      int32_t strides_value = *(reinterpret_cast<const int32_t *>(stridesData_base) + n);
-      strides[axis] = strides_value;
-    }
-  }
-
-  // Set mask bits such as order of inputData
-  const auto begin_mask = acl_common::ReorderBits<int32_t>(node.param().begin_mask, input_rank);
-  const auto end_mask = acl_common::ReorderBits<int32_t>(node.param().end_mask, input_rank);
-  const auto shrink_axis_mask =
-    acl_common::ReorderBits<int32_t>(node.param().shrink_axis_mask, input_rank);
-
-  ::arm_compute::Coordinates starts_set;
-  ::arm_compute::Coordinates ends_set;
-  ::arm_compute::BiStrides strides_set;
-
-  for (size_t i = 0; i < starts.size(); ++i)
-  {
-    starts_set.set(i, starts[i]);
-    ends_set.set(i, ends[i]);
-    strides_set.set(i, strides[i]);
-  }
-
-  // Disable applied dim_correction
-  if (static_cast<size_t>(inputData_tensor->getShape().rank()) !=
-      inputData_tensor->info()->num_dimensions())
-  {
-    // This means that high dimension's value is 1 and input tensor is applied dim_correction
-    acl_common::disableDimCorrection(inputData_tensor);
-  }
-
-  auto fn = acl_common::generateLayer<arm_compute::NEStridedSlice>(
-    inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set,
-    begin_mask, end_mask, shrink_axis_mask);
-
-  // Revert disabling applied dim_correction
-  if (inputData_tensor->getShape().dim(0) == 1)
-  {
-    acl_common::enableDimCorrection(inputData_tensor);
-  }
-
-  _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::TransposeConv &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)};
-  const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)};
-
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
-  const auto ker_shape = _ctx.at(ker_index).shape().asFeature();
-
-  const auto stride = node.param().stride;
-
-  assert((node.param().padding.type == ir::PaddingType::SAME) ||
-         (node.param().padding.type == ir::PaddingType::VALID));
-  auto padding = ir::calculatePadding(node.param().padding, ofm_shape, ifm_shape, stride,
-                                      ker_shape.W, ker_shape.H);
-
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
-  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
-  auto ker_tensor = _tensor_reg->getAclTensor(ker_index);
-
-  const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
-
-  auto fn = acl_common::generateLayer<arm_compute::NEDeconvolutionLayer>(
-    ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(), tconv_info);
-
-  _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Transpose &node)
-{
-  const auto ofm_idx{node.getOutputs().at(0)};
-  const auto ifm_idx{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
-  const auto perm_idx{node.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)};
-
-  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx);
-  const auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx);
-  const auto rank = _ctx.at(ifm_idx).shape().rank();
-
-  const auto &perms = _ctx.at(perm_idx);
-  std::vector<int32_t> pv;
-  if (perms.shape() == ir::Shape{0})
-  {
-    pv.resize(rank);
-    std::iota(pv.begin(), pv.end(), 0);
-    std::reverse(pv.begin(), pv.end());
-  }
-  else
-  {
-    pv = _ctx.at(perm_idx).asVector<int32_t>();
-  }
-
-  std::unique_ptr<arm_compute::IFunction> fn;
-  if (rank == 1)
-  {
-    fn = acl_common::generateLayer<arm_compute::NECopy>(ifm_tensor->handle(), ofm_tensor->handle());
-  }
-  else if (rank == 2)
-  {
-    assert(pv.size() == 2 && pv.at(0) == 1 && pv.at(1) == 0);
-    fn = acl_common::generateLayer<arm_compute::NETranspose>(ifm_tensor->handle(),
-                                                             ofm_tensor->handle());
-  }
-  else
-  {
-    auto backend_pv = acl_common::getARMComputePermutationVector(rank, pv);
-
-    fn = acl_common::generateLayer<arm_compute::NEPermute>(ifm_tensor->handle(),
-                                                           ofm_tensor->handle(), backend_pv);
-  }
-  _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Unpack &node)
-{
-  const auto input_index{node.getInputs().at(ir::operation::Unpack::Input::INPUT)};
-  auto axis{node.param().axis};
-
-  const auto input_rank = _ctx.at(input_index).shape().rank();
-
-  std::vector<ir::OperandIndex> output_indexes;
-  for (const auto &output_index : node.getOutputs())
-    output_indexes.emplace_back(output_index);
-
-  auto input_tensor = _tensor_reg->getAclTensor(input_index);
-  std::vector<arm_compute::ITensor *> outputs;
-  for (const auto &output_index : output_indexes)
-    outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle());
-
-  if (axis < 0)
-    axis += input_rank;
-  axis = acl_common::ToARMComputeAxis(input_rank, axis).value();
-
-  // Disable applied dim_correction
-  if (static_cast<size_t>(input_tensor->getShape().rank()) !=
-      input_tensor->info()->num_dimensions())
-  {
-    // This means that high dimension's value is 1 and input tensor is applied dim_correction
-    acl_common::disableDimCorrection(input_tensor);
-  }
-
-  auto fn =
-    acl_common::generateLayer<arm_compute::NEUnstack>(input_tensor->handle(), outputs, axis);
-
-  // Revert disabling applied dim_correction
-  if (input_tensor->getShape().dim(0) == 1)
-  {
-    acl_common::enableDimCorrection(input_tensor);
-  }
-
-  _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::ExpandDims &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
-
-  auto output_tensor = _tensor_reg->getAclTensor(output_index);
-  auto input_tensor = _tensor_reg->getAclTensor(input_index);
-
-  auto fn = acl_common::generateLayer<arm_compute::NEReshapeLayer>(input_tensor->handle(),
-                                                                   output_tensor->handle());
-
-  _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Comparison &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input0_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
-  const auto input1_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
-
-  const auto comparison_type = node.param().comparison_type;
-
-  auto output_tensor = _tensor_reg->getAclTensor(output_index);
-  auto input0_tensor = _tensor_reg->getAclTensor(input0_index);
-  auto input1_tensor = _tensor_reg->getAclTensor(input1_index);
-
-  auto fn = acl_common::generateLayer<arm_compute::NEElementwiseComparison>(
-    input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
-    (arm_compute::ComparisonOperation)comparison_type);
-
-  _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::OneHot &node)
-{
-  const auto out_idx{node.getOutputs().at(0)};
-  const auto indices_idx{node.getInputs().at(ir::operation::OneHot::Input::INDICES)};
-  const auto depth_idx{node.getInputs().at(ir::operation::OneHot::Input::DEPTH)};
-  const auto onvalue_idx{node.getInputs().at(ir::operation::OneHot::Input::ON_VALUE)};
-  const auto offvalue_idx{node.getInputs().at(ir::operation::OneHot::Input::OFF_VALUE)};
-
-  auto output_tensor = _tensor_reg->getAclTensor(out_idx);
-  auto indices_tensor = _tensor_reg->getAclTensor(indices_idx);
-  auto depth_tensor = _tensor_reg->getAclTensor(depth_idx);
-  auto onvalue_tensor = _tensor_reg->getAclTensor(onvalue_idx);
-  auto offvalue_tensor = _tensor_reg->getAclTensor(offvalue_idx);
-
-  const size_t output_rank = _ctx.at(out_idx).shape().rank();
-  int32_t axis = node.param().axis == -1 ? output_rank - 1 : node.param().axis;
-  axis = acl_common::ToARMComputeAxis(output_rank, axis).value();
-
-  auto fn = acl_common::generateLayer<arm_compute::NEOneHot>(
-    indices_tensor->handle(), depth_tensor->handle(), onvalue_tensor->handle(),
-    offvalue_tensor->handle(), output_tensor->handle(), axis);
-  _return_fn = asAclFunction(std::move(fn));
-}
-
 } // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/Validator.h b/runtime/onert/backend/acl_neon/Validator.h
index 4c98abfebf8..71441943afe 100644
--- a/runtime/onert/backend/acl_neon/Validator.h
+++ b/runtime/onert/backend/acl_neon/Validator.h
@@ -30,8 +30,7 @@ class Validator : public backend::ValidatorBase
   Validator(const ir::Graph &graph) : backend::ValidatorBase(graph) {}
 
 private:
-#define OP(InternalName) \
-  void visit(const ir::operation::InternalName &) override { _supported = true; }
+#define OP(InternalName) void visit(const ir::operation::InternalName &) override;
 #include "Operation.lst"
 #undef OP
 };
diff --git a/runtime/onert/backend/acl_neon/ops/ArgMinMax.cc b/runtime/onert/backend/acl_neon/ops/ArgMinMax.cc
new file mode 100644
index 00000000000..4f4dd1b0835
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/ArgMinMax.cc
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+void Validator::visit(const ir::operation::ArgMinMax &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::ArgMinMax &node)
+{
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)};
+  const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)};
+
+  const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
+
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+
+  int axis_value = _ctx.at(axis_index).asScalar<int32_t>();
+  if (axis_value < 0)
+  {
+    axis_value += ifm_rank;
+  }
+  assert(axis_value >= 0 && axis_value < ifm_rank);
+  const auto fixed_axis = acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
+  auto reduce_type = node.param().is_arg_max ? ::arm_compute::ReductionOperation::ARG_IDX_MAX
+                                             : ::arm_compute::ReductionOperation::ARG_IDX_MIN;
+
+  auto fn = acl_common::generateLayer<arm_compute::NEArgMinMaxLayer>(
+    ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(), reduce_type);
+
+  _return_fn = acl_common::asAclFunction(std::move(fn));
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/BatchToSpaceND.cc b/runtime/onert/backend/acl_neon/ops/BatchToSpaceND.cc
new file mode 100644
index 00000000000..f9874083c13
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/BatchToSpaceND.cc
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+void Validator::visit(const ir::operation::BatchToSpaceND &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
+{
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)};
+  const auto block_size_index{
+    node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
+
+  const auto NNApiInputs = 2;
+  if (node.getInputs().size() != NNApiInputs)
+  {
+    const auto crops_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::CROPS_DATA)};
+    if (!_ctx.at(crops_index).isConstant())
+    {
+      throw std::runtime_error("Non-constant crops NYI for acl_neon backend BatchToSpaceND");
+    }
+
+    auto crops = _ctx.at(crops_index).asVector<int32_t>();
+    for (auto &&crop : crops)
+    {
+      if (crop != 0)
+      {
+        throw std::runtime_error("Non-zero crops NYI for acl_neon backend BatchToSpaceND");
+      }
+    }
+  }
+
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+
+  if (!_ctx.at(block_size_index).data())
+    throw std::runtime_error("ACL NEON does not support dynamic block size for BatchToSpaceND");
+
+  auto block = _ctx.at(block_size_index).asVector<int32_t>();
+  int32_t height = block[0];
+  int32_t width = block[1];
+
+  auto fn = acl_common::generateLayer<arm_compute::NEBatchToSpaceLayer>(
+    ifm_tensor->handle(), width, height, ofm_tensor->handle());
+
+  _return_fn = acl_common::asAclFunction(std::move(fn));
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/BinaryArithmetic.cc b/runtime/onert/backend/acl_neon/ops/BinaryArithmetic.cc
new file mode 100644
index 00000000000..7f7fac2137f
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/BinaryArithmetic.cc
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclActivationBuilder.h>
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+using ActivationBuilder =
+  acl_common::AclActivationBuilder<::arm_compute::ITensor, ::arm_compute::NEActivationLayer,
+                                   acl_common::AclFunction>;
+
+void Validator::visit(const ir::operation::BinaryArithmetic &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
+{
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
+  const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
+
+  const auto activation = node.param().activation;
+
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index);
+  auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index);
+
+  std::unique_ptr<arm_compute::IFunction> fn;
+  switch (node.param().arithmetic_type)
+  {
+    case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
+    {
+      arm_compute::NEArithmeticAddition::validate(lhs_tensor->info(), rhs_tensor->info(),
+                                                  ofm_tensor->info(),
+                                                  arm_compute::ConvertPolicy::SATURATE)
+        .throw_if_error();
+      fn = acl_common::generateLayer<arm_compute::NEArithmeticAddition>(
+        lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+        arm_compute::ConvertPolicy::SATURATE);
+      break;
+    }
+    case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
+    {
+      arm_compute::NEArithmeticSubtraction::validate(lhs_tensor->info(), rhs_tensor->info(),
+                                                     ofm_tensor->info(),
+                                                     arm_compute::ConvertPolicy::SATURATE)
+        .throw_if_error();
+      fn = acl_common::generateLayer<arm_compute::NEArithmeticSubtraction>(
+        lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+        arm_compute::ConvertPolicy::SATURATE);
+      break;
+    }
+    case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
+    {
+      arm_compute::NEPixelWiseMultiplication::validate(
+        lhs_tensor->info(), rhs_tensor->info(), ofm_tensor->info(), 1.0,
+        arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO)
+        .throw_if_error();
+      // RoundingPolicy for scale:1.0 is only allowed RoundingPolicy::TO_ZERO
+      fn = acl_common::generateLayer<arm_compute::NEPixelWiseMultiplication>(
+        lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
+        arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO);
+      break;
+    }
+    case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
+    {
+      arm_compute::NEElementwiseDivision::validate(lhs_tensor->info(), rhs_tensor->info(),
+                                                   ofm_tensor->info())
+        .throw_if_error();
+      fn = acl_common::generateLayer<arm_compute::NEElementwiseDivision>(
+        lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+      break;
+    }
+    default:
+      assert(false && "The BinaryArithmetic operation supports only binary arithmetic operations");
+      break;
+  }
+  _return_fn = std::make_unique<exec::FunctionSequence>(
+    acl_common::asAclFunction(std::move(fn)),
+    ActivationBuilder::generate(activation, ofm_tensor->handle()));
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/Comparison.cc b/runtime/onert/backend/acl_neon/ops/Comparison.cc
new file mode 100644
index 00000000000..74f7c036401
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/Comparison.cc
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+void Validator::visit(const ir::operation::Comparison &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::Comparison &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input0_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
+  const auto input1_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
+
+  const auto comparison_type = node.param().comparison_type;
+
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
+  auto input0_tensor = _tensor_reg->getAclTensor(input0_index);
+  auto input1_tensor = _tensor_reg->getAclTensor(input1_index);
+
+  auto fn = acl_common::generateLayer<arm_compute::NEElementwiseComparison>(
+    input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
+    (arm_compute::ComparisonOperation)comparison_type);
+
+  _return_fn = acl_common::asAclFunction(std::move(fn));
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/Concat.cc b/runtime/onert/backend/acl_neon/ops/Concat.cc
new file mode 100644
index 00000000000..d2326232e64
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/Concat.cc
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+void Validator::visit(const ir::operation::Concat &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::Concat &node)
+{
+  const auto ofm_index{node.getOutputs().at(0)};
+
+  std::vector<ir::OperandIndex> input_indexes;
+  for (const auto &input : node.getInputs())
+    input_indexes.emplace_back(input);
+
+  const auto axis = node.param().axis;
+
+  // Concat elimination check
+  bool eliminated = _tensor_builder->areSubTensorsOf(ofm_index, node.getInputs());
+  if (eliminated)
+  {
+    // If concat eliminated, return a NOP IFunction
+    VERBOSE(acl_neon_KernelGenerator_Concat) << "Concat eliminated" << std::endl;
+    _return_fn = std::make_unique<exec::NopFunction>();
+    return;
+  }
+
+  auto output_tensor = _tensor_reg->getAclTensor(ofm_index);
+  std::vector<const ::arm_compute::ITensor *> input_tensors;
+  for (const auto &ifm_ind : input_indexes)
+    input_tensors.emplace_back(_tensor_reg->getAclTensor(ifm_ind)->handle());
+
+  std::unique_ptr<::arm_compute::IFunction> fn;
+  if (input_indexes.size() < 2)
+  {
+    ::arm_compute::ITensor *input_tesor = _tensor_reg->getAclTensor(input_indexes.at(0))->handle();
+    fn = acl_common::generateLayer<arm_compute::NECopy>(input_tesor, output_tensor->handle());
+  }
+  else
+  {
+    const auto rank = _ctx.at(ofm_index).shape().rank();
+    const auto fixed_axis = acl_common::ToARMComputeAxis(rank, axis).value();
+    fn = acl_common::generateLayer<arm_compute::NEConcatenateLayer>(
+      input_tensors, output_tensor->handle(), fixed_axis);
+  }
+
+  _return_fn = acl_common::asAclFunction(std::move(fn));
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/Conv2D.cc b/runtime/onert/backend/acl_neon/ops/Conv2D.cc
new file mode 100644
index 00000000000..c9b9c68ee82
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/Conv2D.cc
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+void Validator::visit(const ir::operation::Conv2D &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::Conv2D &node)
+{
+  using ir::operation::Conv2D;
+
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
+  const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
+  const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
+
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
+  // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
+  const auto &ker_shape = _ctx.at(ker_index).shape();
+  const auto ker_height = ker_shape.dim(1);
+  const auto ker_width = ker_shape.dim(2);
+
+  const auto stride = node.param().stride;
+  const auto padding =
+    ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, ker_width, ker_height);
+  const auto activation = node.param().activation;
+
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+  auto ker_tensor = _tensor_reg->getAclTensor(ker_index);
+  auto bias_tensor = _tensor_reg->getAclTensor(bias_index);
+
+  const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
+  const auto act_info = acl_common::asActivationLayerInfo(activation);
+
+  auto fn = acl_common::generateLayer<arm_compute::NEConvolutionLayer>(
+    _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
+    ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), conv_info,
+    ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
+
+  _return_fn = acl_common::asAclFunction(std::move(fn));
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/DepthToSpace.cc b/runtime/onert/backend/acl_neon/ops/DepthToSpace.cc
new file mode 100644
index 00000000000..b316aec5374
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/DepthToSpace.cc
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+void Validator::visit(const ir::operation::DepthToSpace &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
+
+  auto block_size = node.param().block_size;
+  assert(block_size > 0);
+
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
+  auto input_tensor = _tensor_reg->getAclTensor(input_index);
+
+  auto fn = acl_common::generateLayer<arm_compute::NEDepthToSpaceLayer>(
+    input_tensor->handle(), output_tensor->handle(), block_size);
+
+  _return_fn = acl_common::asAclFunction(std::move(fn));
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/DepthwiseConv2D.cc b/runtime/onert/backend/acl_neon/ops/DepthwiseConv2D.cc
new file mode 100644
index 00000000000..0b619037c0a
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/DepthwiseConv2D.cc
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+void Validator::visit(const ir::operation::DepthwiseConv2D &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
+{
+  using ir::operation::DepthwiseConv2D;
+
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
+  const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
+  const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
+
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
+  // Kernel format is [1, kernel_height, kernel_width, depth_out].
+  const auto &ker_shape = _ctx.at(ker_index).shape();
+  const auto ker_height = ker_shape.dim(1);
+  const auto ker_width = ker_shape.dim(2);
+
+  const auto stride = node.param().stride;
+  const auto dilation = node.param().dilation;
+  const auto padding =
+    ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+                         dilation.width_factor, dilation.height_factor);
+  const auto multiplier = node.param().multiplier;
+  const auto activation = node.param().activation;
+
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+  auto ker_tensor = _tensor_reg->getAclTensor(ker_index);
+  auto bias_tensor = _tensor_reg->getAclTensor(bias_index);
+
+  const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
+  const auto act_info = acl_common::asActivationLayerInfo(activation);
+  const auto dilation_info = acl_common::asDilation(dilation.width_factor, dilation.height_factor);
+
+  auto fn = acl_common::generateLayer<arm_compute::NEDepthwiseConvolutionLayer>(
+    ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(),
+    conv_info, multiplier, act_info, dilation_info);
+
+  _return_fn = acl_common::asAclFunction(std::move(fn));
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/ElementwiseActivation.cc b/runtime/onert/backend/acl_neon/ops/ElementwiseActivation.cc
new file mode 100644
index 00000000000..3e49b2e177e
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/ElementwiseActivation.cc
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+void Validator::visit(const ir::operation::ElementwiseActivation &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
+{
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
+
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+
+  const ::arm_compute::ActivationLayerInfo act_info =
+    acl_common::asActivationLayerInfo(node.param().op_type, node.param().alpha, node.param().beta);
+
+  std::unique_ptr<arm_compute::IFunction> fn =
+    acl_common::generateLayer<arm_compute::NEActivationLayer>(ifm_tensor->handle(),
+                                                              ofm_tensor->handle(), act_info);
+
+  _return_fn = acl_common::asAclFunction(std::move(fn));
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/ElementwiseBinary.cc b/runtime/onert/backend/acl_neon/ops/ElementwiseBinary.cc
new file mode 100644
index 00000000000..a849ce252d2
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/ElementwiseBinary.cc
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+void Validator::visit(const ir::operation::ElementwiseBinary &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
+  const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
+
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
+  auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index);
+  auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index);
+
+  std::unique_ptr<arm_compute::IFunction> fn;
+  switch (node.param().op_type)
+  {
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND:
+    {
+      fn = acl_common::generateLayer<arm_compute::NELogicalAnd>(
+        lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
+    {
+      fn = acl_common::generateLayer<arm_compute::NELogicalOr>(
+        lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
+    {
+      fn = acl_common::generateLayer<arm_compute::NEElementwiseMax>(
+        lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
+    {
+      fn = acl_common::generateLayer<arm_compute::NEElementwiseMin>(
+        lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+      break;
+    }
+    default:
+    {
+      std::string err_msg("acl_neon KernelGenerator : " + node.name() +
+                          "is not elementwise-binary operations");
+      assert(false && err_msg.c_str());
+      break;
+    }
+  }
+  _return_fn = acl_common::asAclFunction(std::move(fn));
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/ElementwiseUnary.cc b/runtime/onert/backend/acl_neon/ops/ElementwiseUnary.cc
new file mode 100644
index 00000000000..466b22b8338
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/ElementwiseUnary.cc
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+void Validator::visit(const ir::operation::ElementwiseUnary &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
+
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
+  auto input_tensor = _tensor_reg->getAclTensor(input_index);
+
+  std::unique_ptr<arm_compute::IFunction> fn;
+  switch (node.param().op_type)
+  {
+    case ir::operation::ElementwiseUnary::Type::ABS:
+    {
+      const ::arm_compute::ActivationLayerInfo act_info{
+        ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
+
+      fn = acl_common::generateLayer<arm_compute::NEActivationLayer>(
+        input_tensor->handle(), output_tensor->handle(), act_info);
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::CAST:
+    {
+      if (input_tensor->data_type() == output_tensor->data_type())
+      {
+        fn = acl_common::generateLayer<arm_compute::NECopy>(input_tensor->handle(),
+                                                            output_tensor->handle());
+      }
+      else if (_ctx.at(input_index).typeInfo().type() == ir::DataType::BOOL8)
+      {
+        fn = acl_common::generateLayer<arm_compute::NECastBool>(input_tensor->handle(),
+                                                                output_tensor->handle());
+      }
+      else
+      {
+        fn = acl_common::generateLayer<arm_compute::NECast>(
+          input_tensor->handle(), output_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
+      }
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::DEQUANTIZE:
+    {
+      fn = acl_common::generateLayer<arm_compute::NEDequantizationLayer>(input_tensor->handle(),
+                                                                         output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::EXP:
+    {
+      fn = acl_common::generateLayer<arm_compute::NEExpLayer>(input_tensor->handle(),
+                                                              output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::FLOOR:
+    {
+      fn = acl_common::generateLayer<arm_compute::NEFloor>(input_tensor->handle(),
+                                                           output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT:
+    {
+      fn = acl_common::generateLayer<arm_compute::NEBitwiseNot>(input_tensor->handle(),
+                                                                output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::NEG:
+    {
+      fn = acl_common::generateLayer<arm_compute::NENegLayer>(input_tensor->handle(),
+                                                              output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::RSQRT:
+    {
+      fn = acl_common::generateLayer<arm_compute::NERsqrtLayer>(input_tensor->handle(),
+                                                                output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::SQRT:
+    {
+      const ::arm_compute::ActivationLayerInfo act_info{
+        ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
+
+      fn = acl_common::generateLayer<arm_compute::NEActivationLayer>(
+        input_tensor->handle(), output_tensor->handle(), act_info);
+      break;
+    }
+    default:
+    {
+      throw std::runtime_error("acl_neon KernelGenerator : " + node.name() +
+                               "is not supported yet");
+      break;
+    }
+  }
+  _return_fn = acl_common::asAclFunction(std::move(fn));
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/EmbeddingLookup.cc b/runtime/onert/backend/acl_neon/ops/EmbeddingLookup.cc
new file mode 100644
index 00000000000..234195b2834
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/EmbeddingLookup.cc
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+void Validator::visit(const ir::operation::EmbeddingLookup &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
+  const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
+
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
+  auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index);
+  auto values_tensor = _tensor_reg->getAclTensor(values_index);
+
+  size_t n = _ctx.at(values_index).shape().rank();
+  assert(n == values_tensor->num_dimensions());
+  size_t k = _ctx.at(lookups_index).shape().rank();
+  assert(k == lookups_tensor->num_dimensions());
+
+  const int axis = ::onert::backend::acl_common::ToARMComputeAxis(n, 0).value();
+
+  // Disable applied dim_correction
+  if (n != values_tensor->info()->num_dimensions())
+  {
+    // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
+    acl_common::disableDimCorrection(values_tensor);
+  }
+  if (k != lookups_tensor->info()->num_dimensions())
+  {
+    // This means that high dimension's value is 1 and indices tensor is applied dim_correction
+    acl_common::disableDimCorrection(lookups_tensor);
+  }
+
+  auto fn = acl_common::generateLayer<arm_compute::NEGather>(
+    values_tensor->handle(), lookups_tensor->handle(), output_tensor->handle(), axis);
+
+  // Revert disabling applied dim_correction
+  if (values_tensor->dimension(0) == 1)
+  {
+    acl_common::enableDimCorrection(values_tensor);
+  }
+  if (lookups_tensor->dimension(0) == 1)
+  {
+    acl_common::enableDimCorrection(lookups_tensor);
+  }
+
+  _return_fn = acl_common::asAclFunction(std::move(fn));
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/ExpandDims.cc b/runtime/onert/backend/acl_neon/ops/ExpandDims.cc
new file mode 100644
index 00000000000..0fd3b0ee90d
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/ExpandDims.cc
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+void Validator::visit(const ir::operation::ExpandDims &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::ExpandDims &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
+
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
+  auto input_tensor = _tensor_reg->getAclTensor(input_index);
+
+  auto fn = acl_common::generateLayer<arm_compute::NEReshapeLayer>(input_tensor->handle(),
+                                                                   output_tensor->handle());
+
+  _return_fn = acl_common::asAclFunction(std::move(fn));
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/FullyConnected.cc b/runtime/onert/backend/acl_neon/ops/FullyConnected.cc
new file mode 100644
index 00000000000..574970eddc4
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/FullyConnected.cc
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclActivationBuilder.h>
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+using ActivationBuilder =
+  acl_common::AclActivationBuilder<::arm_compute::ITensor, ::arm_compute::NEActivationLayer,
+                                   acl_common::AclFunction>;
+
+void Validator::visit(const ir::operation::FullyConnected &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::FullyConnected &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
+  const auto activation = node.param().activation;
+  if (node.param().weights_format == ir::FullyConnectedWeightsFormat::Shuffled16x1Float32)
+    throw std::runtime_error(
+      "KernelGenerator(acl_neon): FullyConnected 16x1Float32 weights is not supported.");
+
+  auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ITensor,
+                                                ::arm_compute::NEFullyConnectedReshapingLayer>(
+    node, _ctx, _tensor_builder, _tensor_reg);
+  _return_fn = std::make_unique<exec::FunctionSequence>(
+    std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/Gather.cc b/runtime/onert/backend/acl_neon/ops/Gather.cc
new file mode 100644
index 00000000000..ffa65687cb6
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/Gather.cc
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+void Validator::visit(const ir::operation::Gather &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::Gather &node)
+{
+  const auto ofm_index{node.getOutputs().at(0)};
+
+  const auto ifm_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
+  const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
+
+  const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
+  const auto axis_raw = node.param().axis;
+  const auto axis_value = (axis_raw < 0 ? (ifm_rank + axis_raw) : axis_raw);
+  // Converting in reverse order
+  const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
+
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+  auto indices_tensor = _tensor_reg->getAclTensor(indices_index);
+
+  // input is n-D, indices k-D, output is (n + k - 1)-D
+  size_t n = ifm_rank;
+  assert(n == ifm_tensor->num_dimensions());
+  size_t k = _ctx.at(indices_index).shape().rank();
+  assert(k == indices_tensor->num_dimensions());
+
+  // Disable applied dim_correction
+  if (n != ifm_tensor->info()->num_dimensions())
+  {
+    // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
+    acl_common::disableDimCorrection(ifm_tensor);
+  }
+  if (k != indices_tensor->info()->num_dimensions())
+  {
+    // This means that high dimension's value is 1 and indices tensor is applied dim_correction
+    acl_common::disableDimCorrection(indices_tensor);
+  }
+
+  auto fn = acl_common::generateLayer<arm_compute::NEGather>(
+    ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
+
+  // Revert disabling applied dim_correction
+  if (ifm_tensor->dimension(0) == 1)
+  {
+    acl_common::enableDimCorrection(ifm_tensor);
+  }
+  if (indices_tensor->dimension(0) == 1)
+  {
+    acl_common::enableDimCorrection(indices_tensor);
+  }
+
+  _return_fn = acl_common::asAclFunction(std::move(fn));
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/HashtableLookup.cc b/runtime/onert/backend/acl_neon/ops/HashtableLookup.cc
new file mode 100644
index 00000000000..dbb2a015a95
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/HashtableLookup.cc
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+void Validator::visit(const ir::operation::HashtableLookup &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
+{
+  const auto output_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::OUTPUT)};
+  const auto hits_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::HITS)};
+
+  const auto lookups_index{node.getInputs().at(ir::operation::HashtableLookup::Input::LOOKUPS)};
+  const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
+  const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
+
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
+  auto hits_tensor = _tensor_reg->getAclTensor(hits_index);
+
+  auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index);
+  auto keys_tensor = _tensor_reg->getAclTensor(keys_index);
+  auto values_tensor = _tensor_reg->getAclTensor(values_index);
+
+  auto fn = acl_common::generateLayer<arm_compute::NEHashtableLookup>(
+    lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
+    output_tensor->handle(), hits_tensor->handle());
+
+  _return_fn = acl_common::asAclFunction(std::move(fn));
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/InstanceNorm.cc b/runtime/onert/backend/acl_neon/ops/InstanceNorm.cc
new file mode 100644
index 00000000000..45890802504
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/InstanceNorm.cc
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclActivationBuilder.h>
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+using ActivationBuilder =
+  acl_common::AclActivationBuilder<::arm_compute::ITensor, ::arm_compute::NEActivationLayer,
+                                   acl_common::AclFunction>;
+
+void Validator::visit(const ir::operation::InstanceNorm &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
+{
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(ir::operation::InstanceNorm::Input::INPUT)};
+  const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
+  const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
+
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+  auto epsilon = node.param().epsilon;
+  auto activation = node.param().activation;
+
+  if (!_ctx.at(gamma_index).isConstant() || !_ctx.at(beta_index).isConstant())
+    throw std::runtime_error{"Non-constant gamma or beta for acl_neon backend InstanceNorm"};
+
+  auto gamma = _ctx.at(gamma_index).asScalar<float>();
+  auto beta = _ctx.at(beta_index).asScalar<float>();
+  auto fn = acl_common::generateLayer<arm_compute::NEInstanceNormalizationLayer>(
+    ifm_tensor->handle(), ofm_tensor->handle(), gamma, beta, epsilon);
+
+  _return_fn = std::make_unique<exec::FunctionSequence>(
+    acl_common::asAclFunction(std::move(fn)),
+    ActivationBuilder::generate(activation, ofm_tensor->handle()));
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/L2Normalization.cc b/runtime/onert/backend/acl_neon/ops/L2Normalization.cc
new file mode 100644
index 00000000000..82034c6acc5
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/L2Normalization.cc
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+void Validator::visit(const ir::operation::L2Normalization &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::L2Normalization &node)
+{
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(ir::operation::L2Normalization::Input::INPUT)};
+
+  // {CL|Neon}L2Normalization performs the reduction only along dimension 0
+  // L2 Normalization always performs the reduction along the depth axis
+  // Thus, we repurpose {CL|Neon}NormalizationLayers to act as depthwise L2 normalizations by
+  // choosing normalization parameters as below
+
+  const auto &ifm_shape = _ctx.at(ifm_index).shape();
+  // TODO Support optional constant dimension that normalization would be performed on
+  const auto normalization_axis = _ctx.at(ifm_index).shape().rank() - 1;
+  int32_t radius =
+    2 * ifm_shape.dim(normalization_axis) + 1; // normSize = depth(last dimension) * 2 + 1
+  float alpha = 1.0f;                          // In the implementation to make alpha_ become 1
+  float beta = 0.5f;                           // pow(reduction, -0.5) = 1 / sqrt(reduction)
+  float bias = 0.0f;                           // Don't offset the reduction.
+
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+
+  const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
+                                                               radius, alpha, beta, bias, false);
+
+  auto fn = acl_common::generateLayer<arm_compute::NENormalizationLayer>(
+    ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
+
+  _return_fn = acl_common::asAclFunction(std::move(fn));
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/LSTM.cc b/runtime/onert/backend/acl_neon/ops/LSTM.cc
new file mode 100644
index 00000000000..fe40a219c29
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/LSTM.cc
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+void Validator::visit(const ir::operation::LSTM &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::LSTM &node)
+{
+  _return_fn = acl_common::kernelGenLSTM<acl_common::AclFunction, ::arm_compute::ITensor,
+                                         ::arm_compute::NELSTMLayer>(node, _ctx, _tensor_reg);
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/LocalResponseNormalization.cc b/runtime/onert/backend/acl_neon/ops/LocalResponseNormalization.cc
new file mode 100644
index 00000000000..918a95888b4
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/LocalResponseNormalization.cc
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+void Validator::visit(const ir::operation::LocalResponseNormalization &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &node)
+{
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{
+    node.getInputs().at(ir::operation::LocalResponseNormalization::Input::INPUT)};
+
+  auto radius = node.param().radius;
+  auto alpha = node.param().alpha;
+  auto beta = node.param().beta;
+  auto bias = node.param().bias;
+
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+
+  const auto norm_info = ::arm_compute::NormalizationLayerInfo(
+    ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
+
+  auto fn = acl_common::generateLayer<arm_compute::NENormalizationLayer>(
+    ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
+
+  _return_fn = acl_common::asAclFunction(std::move(fn));
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/OneHot.cc b/runtime/onert/backend/acl_neon/ops/OneHot.cc
new file mode 100644
index 00000000000..b54a4e727ad
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/OneHot.cc
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+void Validator::visit(const ir::operation::OneHot &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::OneHot &node)
+{
+  const auto out_idx{node.getOutputs().at(0)};
+  const auto indices_idx{node.getInputs().at(ir::operation::OneHot::Input::INDICES)};
+  const auto depth_idx{node.getInputs().at(ir::operation::OneHot::Input::DEPTH)};
+  const auto onvalue_idx{node.getInputs().at(ir::operation::OneHot::Input::ON_VALUE)};
+  const auto offvalue_idx{node.getInputs().at(ir::operation::OneHot::Input::OFF_VALUE)};
+
+  auto output_tensor = _tensor_reg->getAclTensor(out_idx);
+  auto indices_tensor = _tensor_reg->getAclTensor(indices_idx);
+  auto depth_tensor = _tensor_reg->getAclTensor(depth_idx);
+  auto onvalue_tensor = _tensor_reg->getAclTensor(onvalue_idx);
+  auto offvalue_tensor = _tensor_reg->getAclTensor(offvalue_idx);
+
+  const size_t output_rank = _ctx.at(out_idx).shape().rank();
+  int32_t axis = node.param().axis == -1 ? output_rank - 1 : node.param().axis;
+  axis = acl_common::ToARMComputeAxis(output_rank, axis).value();
+
+  auto fn = acl_common::generateLayer<arm_compute::NEOneHot>(
+    indices_tensor->handle(), depth_tensor->handle(), onvalue_tensor->handle(),
+    offvalue_tensor->handle(), output_tensor->handle(), axis);
+  _return_fn = acl_common::asAclFunction(std::move(fn));
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/PReLU.cc b/runtime/onert/backend/acl_neon/ops/PReLU.cc
new file mode 100644
index 00000000000..d1047e3246f
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/PReLU.cc
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+void Validator::visit(const ir::operation::PReLU &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::PReLU &node)
+{
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
+  const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
+
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+  auto alpha_tensor = _tensor_reg->getAclTensor(alpha_index);
+
+  auto fn = acl_common::generateLayer<arm_compute::NEPReluLayer>(
+    ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
+
+  _return_fn = acl_common::asAclFunction(std::move(fn));
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/Pack.cc b/runtime/onert/backend/acl_neon/ops/Pack.cc
new file mode 100644
index 00000000000..ffb0de3a7cf
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/Pack.cc
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+void Validator::visit(const ir::operation::Pack &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::Pack &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  auto axis{node.param().axis};
+
+  const auto output_rank = _ctx.at(output_index).shape().rank();
+
+  std::vector<ir::OperandIndex> input_indexes;
+  for (const auto &input_index : node.getInputs())
+    input_indexes.emplace_back(input_index);
+
+  auto output = _tensor_reg->getAclTensor(output_index)->handle();
+  std::vector<arm_compute::ITensor *> inputs;
+  for (const auto &input_index : input_indexes)
+    inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle());
+
+  if (axis < 0)
+    axis += output_rank;
+  axis = acl_common::ToARMComputeAxis(output_rank, axis).value();
+
+  // Disable applied dim_correction
+  for (const auto &input_index : input_indexes)
+  {
+    const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
+    if (input_tensor->num_dimensions() != input_tensor->info()->num_dimensions())
+    {
+      // This means that high dimension's value is 1 and input tensor is applied dim_correction
+      acl_common::disableDimCorrection(input_tensor);
+    }
+  }
+
+  auto fn = acl_common::generateLayer<arm_compute::NEStackLayer>(inputs, axis, output);
+
+  // Revert disabling applied dim_correction
+  for (const auto &input_index : input_indexes)
+  {
+    const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
+    if (input_tensor->dimension(0) == 1)
+    {
+      acl_common::enableDimCorrection(input_tensor);
+    }
+  }
+
+  _return_fn = acl_common::asAclFunction(std::move(fn));
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/Pad.cc b/runtime/onert/backend/acl_neon/ops/Pad.cc
new file mode 100644
index 00000000000..3905a7a3196
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/Pad.cc
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+void Validator::visit(const ir::operation::Pad &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::Pad &node)
+{
+  const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
+  const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
+  const auto output_index{node.getOutputs().at(0)};
+  assert(_ctx.at(pad_index).data());
+
+  auto rank = _ctx.at(input_index).shape().rank();
+  auto pad_base = _ctx.at(pad_index).data()->base();
+
+  auto input = _tensor_reg->getAclTensor(input_index)->handle();
+  auto output = _tensor_reg->getAclTensor(output_index)->handle();
+
+  ::arm_compute::PaddingList padding_list;
+  padding_list.resize(rank);
+  for (int32_t n = 0; n < rank; ++n)
+  {
+    const int32_t *from = reinterpret_cast<const int32_t *>(pad_base) + (n * 2);
+
+    const auto axis = acl_common::ToARMComputeAxis(rank, n).value();
+    padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]};
+  }
+
+  [[maybe_unused]] const auto input_type = _ctx.at(input_index).typeInfo();
+  assert(input->info()->data_type() == acl_common::asDataType(input_type.type()));
+  assert(input->info()->quantization_info() ==
+         ::arm_compute::QuantizationInfo(input_type.scale(), input_type.zero_point()));
+  const auto pixel_value =
+    ::arm_compute::PixelValue(0, input->info()->data_type(), input->info()->quantization_info());
+
+  auto fn =
+    acl_common::generateLayer<arm_compute::NEPadLayer>(input, output, padding_list, pixel_value);
+
+  _return_fn = acl_common::asAclFunction(std::move(fn));
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/Pool2D.cc b/runtime/onert/backend/acl_neon/ops/Pool2D.cc
new file mode 100644
index 00000000000..35a920a156c
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/Pool2D.cc
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclActivationBuilder.h>
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+using ActivationBuilder =
+  acl_common::AclActivationBuilder<::arm_compute::ITensor, ::arm_compute::NEActivationLayer,
+                                   acl_common::AclFunction>;
+
+void Validator::visit(const ir::operation::Pool2D &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::Pool2D &node)
+{
+  auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
+    node, _ctx, _tensor_reg, acl_common::convertPoolType(node.param().op_type));
+
+  const auto ofm_index{node.getOutputs().at(0)};
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  const auto activation = node.param().activation;
+  _return_fn = std::make_unique<exec::FunctionSequence>(
+    acl_common::asAclFunction(std::move(raw_fn)),
+    ActivationBuilder::generate(activation, ofm_tensor->handle()));
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/RNN.cc b/runtime/onert/backend/acl_neon/ops/RNN.cc
new file mode 100644
index 00000000000..9aad1f15583
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/RNN.cc
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+void Validator::visit(const ir::operation::RNN &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::RNN &node)
+{
+  const auto output_index{node.getOutputs().at(ir::operation::RNN::Output::OUTPUT)};
+  const auto hidden_state_out_index{
+    node.getOutputs().at(ir::operation::RNN::Output::HIDDEN_STATE_OUT)};
+
+  const auto input_index{node.getInputs().at(ir::operation::RNN::Input::INPUT)};
+  const auto weights_index{node.getInputs().at(ir::operation::RNN::Input::WEIGHTS)};
+  const auto recurrent_weights_index{
+    node.getInputs().at(ir::operation::RNN::Input::RECURRENT_WEIGHTS)};
+  const auto bias_index{node.getInputs().at(ir::operation::RNN::Input::BIAS)};
+  const auto hidden_state_in_index{node.getInputs().at(ir::operation::RNN::Input::HIDDEN_STATE_IN)};
+
+  const auto activation = node.param().activation;
+
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
+  auto hidden_state_out_tensor = _tensor_reg->getAclTensor(hidden_state_out_index);
+
+  auto input_tensor = _tensor_reg->getAclTensor(input_index);
+  auto weights_tensor = _tensor_reg->getAclTensor(weights_index);
+  auto recurrent_weights_tensor = _tensor_reg->getAclTensor(recurrent_weights_index);
+  auto bias_tensor = _tensor_reg->getAclTensor(bias_index);
+  auto hidden_state_in_tensor = _tensor_reg->getAclTensor(hidden_state_in_index);
+  auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
+
+  auto copy_layer = acl_common::generateLayer<arm_compute::NECopy>(
+    hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
+  _return_fn = acl_common::asAclFunction(std::move(copy_layer));
+
+  auto fn = acl_common::generateLayer<arm_compute::NERNNLayer>(
+    _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+    weights_tensor->handle(), recurrent_weights_tensor->handle(), bias_tensor->handle(),
+    hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
+  _return_fn = acl_common::asAclFunction(std::move(fn));
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/Reduce.cc b/runtime/onert/backend/acl_neon/ops/Reduce.cc
new file mode 100644
index 00000000000..d5ba9668fe7
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/Reduce.cc
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+void Validator::visit(const ir::operation::Reduce &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::Reduce &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
+  const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
+
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
+  auto input_tensor = _tensor_reg->getAclTensor(input_index);
+
+  // Convert to ACL axes taking into account negative values and possible duplicates.
+  const auto &axes = _ctx.at(axes_index);
+  const auto input_rank = _ctx.at(input_index).shape().rank();
+  const auto reduce_axes = acl_common::asCoordinates(axes, input_rank);
+  const auto reduce_type = node.param().reduce_type;
+  const auto keep_dims = node.param().keep_dims;
+
+  std::unique_ptr<::arm_compute::IFunction> fn;
+  if (reduce_type == ir::operation::Reduce::ReduceType::MEAN)
+  {
+    fn = acl_common::generateLayer<arm_compute::NEReduceMean>(input_tensor->handle(), reduce_axes,
+                                                              keep_dims, output_tensor->handle());
+  }
+  else if (reduce_type == ir::operation::Reduce::ReduceType::SUM)
+  {
+    fn = acl_common::generateLayer<arm_compute::NEReduceSum>(input_tensor->handle(), reduce_axes,
+                                                             keep_dims, output_tensor->handle());
+  }
+  else
+  {
+    fn = acl_common::generateLayer<arm_compute::NEReduceOperation>(
+      input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle(),
+      acl_common::convertReduceType(reduce_type));
+  }
+  _return_fn = acl_common::asAclFunction(std::move(fn));
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/Reshape.cc b/runtime/onert/backend/acl_neon/ops/Reshape.cc
new file mode 100644
index 00000000000..72324e3694f
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/Reshape.cc
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+void Validator::visit(const ir::operation::Reshape &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::Reshape &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
+
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
+  auto input_tensor = _tensor_reg->getAclTensor(input_index);
+
+  auto fn = acl_common::generateLayer<arm_compute::NEReshapeLayer>(input_tensor->handle(),
+                                                                   output_tensor->handle());
+
+  _return_fn = acl_common::asAclFunction(std::move(fn));
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/ResizeBilinear.cc b/runtime/onert/backend/acl_neon/ops/ResizeBilinear.cc
new file mode 100644
index 00000000000..8b1ec2fc452
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/ResizeBilinear.cc
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+void Validator::visit(const ir::operation::ResizeBilinear &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
+{
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
+
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+
+  auto fn = acl_common::generateLayer<arm_compute::NEScale>(
+    ifm_tensor->handle(), ofm_tensor->handle(),
+    ::arm_compute::ScaleKernelInfo{::arm_compute::InterpolationPolicy::BILINEAR,
+                                   ::arm_compute::BorderMode::REPLICATE,
+                                   ::arm_compute::PixelValue(0.f),
+                                   ::arm_compute::SamplingPolicy::TOP_LEFT, false /*use padding*/});
+
+  _return_fn = acl_common::asAclFunction(std::move(fn));
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/Slice.cc b/runtime/onert/backend/acl_neon/ops/Slice.cc
new file mode 100644
index 00000000000..d0488b9d468
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/Slice.cc
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+void Validator::visit(const ir::operation::Slice &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::Slice &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(ir::operation::Slice::Input::INPUT)};
+  const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
+  const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
+
+  auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
+  auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
+
+  // Set initializers for indices data such as order of inputData
+  int input_rank = _ctx.at(input_index).shape().rank();
+  std::vector<int32_t> starts;
+  std::vector<int32_t> ends;
+  starts.resize(input_rank, 0);
+  ends.resize(input_rank, 0);
+  {
+    auto beginData_base = _ctx.at(begins_index).data()->base();
+    auto sizeData_base = _ctx.at(sizes_index).data()->base();
+    [[maybe_unused]] const int beginData_size = _ctx.at(begins_index).shape().num_elements();
+    [[maybe_unused]] const int sizeData_size = _ctx.at(sizes_index).shape().num_elements();
+
+    using ir::DataType;
+
+    assert(_ctx.at(begins_index).typeInfo().type() == DataType::INT32);
+    assert(_ctx.at(sizes_index).typeInfo().type() == DataType::INT32);
+    assert(beginData_size == input_rank);
+    assert(sizeData_size == input_rank);
+
+    assert(beginData_base != nullptr);
+    for (int n = 0; n < input_rank; ++n)
+    {
+      auto axis = ::onert::backend::acl_common::ToARMComputeAxis(input_rank, n).value();
+
+      int32_t begin_value = *(reinterpret_cast<const int32_t *>(beginData_base) + n);
+      starts[axis] = begin_value;
+
+      int32_t size_value = *(reinterpret_cast<const int32_t *>(sizeData_base) + n);
+      ends[axis] = begin_value + size_value;
+    }
+  }
+
+  ::arm_compute::Coordinates starts_set;
+  ::arm_compute::Coordinates ends_set;
+
+  for (size_t i = 0; i < starts.size(); ++i)
+  {
+    starts_set.set(i, starts[i]);
+    ends_set.set(i, ends[i]);
+  }
+
+  auto fn = acl_common::generateLayer<arm_compute::NESlice>(
+    inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
+
+  _return_fn = acl_common::asAclFunction(std::move(fn));
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/Softmax.cc b/runtime/onert/backend/acl_neon/ops/Softmax.cc
new file mode 100644
index 00000000000..e432f3e0610
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/Softmax.cc
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+void Validator::visit(const ir::operation::Softmax &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::Softmax &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
+  const auto beta = node.param().beta;
+
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
+  auto input_tensor = _tensor_reg->getAclTensor(input_index);
+
+  // NOTE NESoftmaxLayer's default axis is -1
+  auto fn = acl_common::generateLayer<arm_compute::NESoftmaxLayer>(
+    _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+    output_tensor->handle(), beta);
+
+  _return_fn = acl_common::asAclFunction(std::move(fn));
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/SpaceToBatchND.cc b/runtime/onert/backend/acl_neon/ops/SpaceToBatchND.cc
new file mode 100644
index 00000000000..c9c8b8754be
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/SpaceToBatchND.cc
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+void Validator::visit(const ir::operation::SpaceToBatchND &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
+{
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
+  const auto block_size_index{
+    node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
+  const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
+
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+  auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index);
+  auto paddings_tensor = _tensor_reg->getAclTensor(paddings_index);
+
+  assert(_ctx.at(block_size_index).data());
+  assert(_ctx.at(paddings_index).data());
+
+  auto fn = acl_common::generateLayer<arm_compute::NESpaceToBatchLayer>(
+    ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
+    ofm_tensor->handle());
+
+  _return_fn = acl_common::asAclFunction(std::move(fn));
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/SpaceToDepth.cc b/runtime/onert/backend/acl_neon/ops/SpaceToDepth.cc
new file mode 100644
index 00000000000..0677264fcdf
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/SpaceToDepth.cc
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+void Validator::visit(const ir::operation::SpaceToDepth &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
+{
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
+
+  auto block_size = node.param().block_size;
+
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+
+  auto fn = acl_common::generateLayer<arm_compute::NESpaceToDepthLayer>(
+    ifm_tensor->handle(), ofm_tensor->handle(), block_size);
+
+  _return_fn = acl_common::asAclFunction(std::move(fn));
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/Split.cc b/runtime/onert/backend/acl_neon/ops/Split.cc
new file mode 100644
index 00000000000..4a1146246ca
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/Split.cc
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+void Validator::visit(const ir::operation::Split &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::Split &node)
+{
+  // TODO Support this op by SubTensor
+  const auto ifm_index{node.getInputs().at(ir::operation::Split::Input::INPUT)};
+  const auto axis_index{node.getInputs().at(ir::operation::Split::Input::AXIS)};
+
+  assert(node.param().num_splits == static_cast<int>(node.getOutputs().size()));
+  if (!_ctx.at(axis_index).isConstant())
+  {
+    throw std::runtime_error("Non-constant axis_index NYI for acl_neon backend");
+  }
+
+  const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
+  std::vector<ir::OperandIndex> output_indexes;
+  for (const auto &output : node.getOutputs())
+    output_indexes.emplace_back(output);
+
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+  std::vector<arm_compute::ITensor *> output_tensors;
+  for (const auto &ofm_ind : output_indexes)
+    output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind)->handle());
+
+  auto axis = _ctx.at(axis_index).asScalar<int32_t>();
+  if (axis < 0)
+    axis += ifm_rank;
+  axis = acl_common::ToARMComputeAxis(ifm_rank, axis).value();
+
+  auto fn =
+    acl_common::generateLayer<arm_compute::NESplit>(ifm_tensor->handle(), output_tensors, axis);
+
+  _return_fn = acl_common::asAclFunction(std::move(fn));
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/SquaredDifference.cc b/runtime/onert/backend/acl_neon/ops/SquaredDifference.cc
new file mode 100644
index 00000000000..7a46db73999
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/SquaredDifference.cc
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+void Validator::visit(const ir::operation::SquaredDifference &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
+{
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
+  const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
+
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index);
+  auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index);
+
+  auto fn = acl_common::generateLayer<arm_compute::NEElementwiseSquaredDiff>(
+    lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+
+  _return_fn = acl_common::asAclFunction(std::move(fn));
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/Squeeze.cc b/runtime/onert/backend/acl_neon/ops/Squeeze.cc
new file mode 100644
index 00000000000..630777fcb76
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/Squeeze.cc
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+void Validator::visit(const ir::operation::Squeeze &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::Squeeze &node)
+{
+  // Squeeze is identical to reshape except that it has an optional dimensions input.
+  // In addition, optional dims_index is ignored since output tensor already has squeezed shape
+  // by freezer and toco
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
+  const auto dims{node.param().dims};
+  const auto ndim{node.param().ndim};
+  (void)dims;
+  (void)ndim;
+
+  auto output_tensor = _tensor_reg->getAclTensor(output_index);
+  auto input_tensor = _tensor_reg->getAclTensor(input_index);
+  auto fn = acl_common::generateLayer<arm_compute::NEReshapeLayer>(input_tensor->handle(),
+                                                                   output_tensor->handle());
+  _return_fn = acl_common::asAclFunction(std::move(fn));
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/StridedSlice.cc b/runtime/onert/backend/acl_neon/ops/StridedSlice.cc
new file mode 100644
index 00000000000..7b7760fcb21
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/StridedSlice.cc
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+void Validator::visit(const ir::operation::StridedSlice &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::StridedSlice &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
+  const auto starts_index{node.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
+  const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
+  const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
+
+  auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
+  auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
+
+  // Set initializers for indices data such as order of inputData
+  int input_rank = _ctx.at(input_index).shape().rank();
+  std::vector<int32_t> starts;
+  std::vector<int32_t> ends;
+  std::vector<int32_t> strides;
+  starts.resize(input_rank, 0);
+  ends.resize(input_rank, 0);
+  strides.resize(input_rank, 0);
+  {
+    auto startData_base = _ctx.at(starts_index).data()->base();
+    auto endData_base = _ctx.at(ends_index).data()->base();
+    auto stridesData_base = _ctx.at(strides_index).data()->base();
+    [[maybe_unused]] const int startData_size = _ctx.at(starts_index).shape().num_elements();
+    [[maybe_unused]] const int endData_size = _ctx.at(ends_index).shape().num_elements();
+    [[maybe_unused]] const int stridesData_size = _ctx.at(strides_index).shape().num_elements();
+
+    using ir::DataType;
+
+    assert(_ctx.at(starts_index).typeInfo().type() == DataType::INT32);
+    assert(_ctx.at(ends_index).typeInfo().type() == DataType::INT32);
+    assert(_ctx.at(strides_index).typeInfo().type() == DataType::INT32);
+    assert(startData_size == input_rank);
+    assert(endData_size == input_rank);
+    assert(stridesData_size == input_rank);
+
+    assert(startData_base != nullptr);
+    for (int n = 0; n < input_rank; ++n)
+    {
+      auto axis = ::onert::backend::acl_common::ToARMComputeAxis(input_rank, n).value();
+
+      int32_t start_value = *(reinterpret_cast<const int32_t *>(startData_base) + n);
+      starts[axis] = start_value;
+
+      int32_t end_value = *(reinterpret_cast<const int32_t *>(endData_base) + n);
+      ends[axis] = end_value;
+
+      int32_t strides_value = *(reinterpret_cast<const int32_t *>(stridesData_base) + n);
+      strides[axis] = strides_value;
+    }
+  }
+
+  // Set mask bits such as order of inputData
+  const auto begin_mask = acl_common::ReorderBits<int32_t>(node.param().begin_mask, input_rank);
+  const auto end_mask = acl_common::ReorderBits<int32_t>(node.param().end_mask, input_rank);
+  const auto shrink_axis_mask =
+    acl_common::ReorderBits<int32_t>(node.param().shrink_axis_mask, input_rank);
+
+  ::arm_compute::Coordinates starts_set;
+  ::arm_compute::Coordinates ends_set;
+  ::arm_compute::BiStrides strides_set;
+
+  for (size_t i = 0; i < starts.size(); ++i)
+  {
+    starts_set.set(i, starts[i]);
+    ends_set.set(i, ends[i]);
+    strides_set.set(i, strides[i]);
+  }
+
+  // Disable applied dim_correction
+  if (static_cast<size_t>(inputData_tensor->getShape().rank()) !=
+      inputData_tensor->info()->num_dimensions())
+  {
+    // This means that high dimension's value is 1 and input tensor is applied dim_correction
+    acl_common::disableDimCorrection(inputData_tensor);
+  }
+
+  auto fn = acl_common::generateLayer<arm_compute::NEStridedSlice>(
+    inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set,
+    begin_mask, end_mask, shrink_axis_mask);
+
+  // Revert disabling applied dim_correction
+  if (inputData_tensor->getShape().dim(0) == 1)
+  {
+    acl_common::enableDimCorrection(inputData_tensor);
+  }
+
+  _return_fn = acl_common::asAclFunction(std::move(fn));
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/Transpose.cc b/runtime/onert/backend/acl_neon/ops/Transpose.cc
new file mode 100644
index 00000000000..6e912f474df
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/Transpose.cc
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+void Validator::visit(const ir::operation::Transpose &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::Transpose &node)
+{
+  const auto ofm_idx{node.getOutputs().at(0)};
+  const auto ifm_idx{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
+  const auto perm_idx{node.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)};
+
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx);
+  const auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx);
+  const auto rank = _ctx.at(ifm_idx).shape().rank();
+
+  const auto &perms = _ctx.at(perm_idx);
+  std::vector<int32_t> pv;
+  if (perms.shape() == ir::Shape{0})
+  {
+    pv.resize(rank);
+    std::iota(pv.begin(), pv.end(), 0);
+    std::reverse(pv.begin(), pv.end());
+  }
+  else
+  {
+    pv = _ctx.at(perm_idx).asVector<int32_t>();
+  }
+
+  std::unique_ptr<arm_compute::IFunction> fn;
+  if (rank == 1)
+  {
+    fn = acl_common::generateLayer<arm_compute::NECopy>(ifm_tensor->handle(), ofm_tensor->handle());
+  }
+  else if (rank == 2)
+  {
+    assert(pv.size() == 2 && pv.at(0) == 1 && pv.at(1) == 0);
+    fn = acl_common::generateLayer<arm_compute::NETranspose>(ifm_tensor->handle(),
+                                                             ofm_tensor->handle());
+  }
+  else
+  {
+    auto backend_pv = acl_common::getARMComputePermutationVector(rank, pv);
+
+    fn = acl_common::generateLayer<arm_compute::NEPermute>(ifm_tensor->handle(),
+                                                           ofm_tensor->handle(), backend_pv);
+  }
+  _return_fn = acl_common::asAclFunction(std::move(fn));
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/TransposeConv.cc b/runtime/onert/backend/acl_neon/ops/TransposeConv.cc
new file mode 100644
index 00000000000..641537ea371
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/TransposeConv.cc
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+void Validator::visit(const ir::operation::TransposeConv &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::TransposeConv &node)
+{
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)};
+  const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)};
+
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
+  const auto ker_shape = _ctx.at(ker_index).shape().asFeature();
+
+  const auto stride = node.param().stride;
+
+  assert((node.param().padding.type == ir::PaddingType::SAME) ||
+         (node.param().padding.type == ir::PaddingType::VALID));
+  auto padding = ir::calculatePadding(node.param().padding, ofm_shape, ifm_shape, stride,
+                                      ker_shape.W, ker_shape.H);
+
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
+  auto ker_tensor = _tensor_reg->getAclTensor(ker_index);
+
+  const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
+
+  auto fn = acl_common::generateLayer<arm_compute::NEDeconvolutionLayer>(
+    ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(), tconv_info);
+
+  _return_fn = acl_common::asAclFunction(std::move(fn));
+}
+
+} // namespace onert::backend::acl_neon
diff --git a/runtime/onert/backend/acl_neon/ops/Unpack.cc b/runtime/onert/backend/acl_neon/ops/Unpack.cc
new file mode 100644
index 00000000000..86e13ffc619
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/ops/Unpack.cc
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../KernelGenerator.h"
+#include "../Validator.h"
+
+#include <AclKernelGen.h>
+
+namespace onert::backend::acl_neon
+{
+
+void Validator::visit(const ir::operation::Unpack &) { _supported = true; }
+
+void KernelGenerator::visit(const ir::operation::Unpack &node)
+{
+  const auto input_index{node.getInputs().at(ir::operation::Unpack::Input::INPUT)};
+  auto axis{node.param().axis};
+
+  const auto input_rank = _ctx.at(input_index).shape().rank();
+
+  std::vector<ir::OperandIndex> output_indexes;
+  for (const auto &output_index : node.getOutputs())
+    output_indexes.emplace_back(output_index);
+
+  auto input_tensor = _tensor_reg->getAclTensor(input_index);
+  std::vector<arm_compute::ITensor *> outputs;
+  for (const auto &output_index : output_indexes)
+    outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle());
+
+  if (axis < 0)
+    axis += input_rank;
+  axis = acl_common::ToARMComputeAxis(input_rank, axis).value();
+
+  // Disable applied dim_correction
+  if (static_cast<size_t>(input_tensor->getShape().rank()) !=
+      input_tensor->info()->num_dimensions())
+  {
+    // This means that high dimension's value is 1 and input tensor is applied dim_correction
+    acl_common::disableDimCorrection(input_tensor);
+  }
+
+  auto fn =
+    acl_common::generateLayer<arm_compute::NEUnstack>(input_tensor->handle(), outputs, axis);
+
+  // Revert disabling applied dim_correction
+  if (input_tensor->getShape().dim(0) == 1)
+  {
+    acl_common::enableDimCorrection(input_tensor);
+  }
+
+  _return_fn = acl_common::asAclFunction(std::move(fn));
+}
+
+} // namespace onert::backend::acl_neon