From 01e3a624c950c1f9bfec1183f9244c131905a9c9 Mon Sep 17 00:00:00 2001 From: Hyeongseok Oh Date: Thu, 17 Apr 2025 10:27:49 +0900 Subject: [PATCH] [onert] Remove Einsum support This commit removes einsum support in onert and tests. ONE-DCO-1.0-Signed-off-by: Hyeongseok Oh --- .../cker/include/cker/operation/Einsum.h | 936 ------------------ .../include/cker/operation/FusedBatchNorm.h | 3 +- .../include/cker/operation/Helper/BCast.h | 6 - .../cker/operation/Helper/MatmulBCast.h | 95 -- runtime/onert/backend/cpu/KernelGenerator.cc | 19 - runtime/onert/backend/cpu/KernelGenerator.h | 1 - runtime/onert/backend/cpu/ops/EinsumLayer.cc | 74 -- runtime/onert/backend/cpu/ops/EinsumLayer.h | 60 -- .../core/include/ir/Operations.Include.h | 1 - runtime/onert/core/include/ir/Operations.lst | 1 - .../onert/core/include/ir/operation/Einsum.h | 51 - runtime/onert/core/src/ir/operation/Einsum.cc | 31 - .../operation/UntrainableOperation.test.cc | 17 - runtime/onert/core/src/loader/BaseLoader.h | 26 - runtime/tests/nnapi/CMakeLists.txt | 2 +- .../nnapi/bridge/include/NeuralNetworksEx.h | 15 +- .../nnapi/bridge/wrapper/OperationFactory.cc | 23 - .../nnapi_gtest.skip.aarch64-android.acl_cl | 5 - .../nnapi_gtest.skip.aarch64-android.acl_neon | 5 - .../nnapi_gtest.skip.aarch64-linux.acl_cl | 5 - .../nnapi_gtest.skip.aarch64-linux.acl_neon | 5 - .../nnapi_gtest.skip.armv7l-linux.acl_cl | 5 - .../nnapi_gtest.skip.armv7l-linux.acl_neon | 5 - .../nnapi/specs/Ex/einsum_ex_float.mod.py | 154 --- 24 files changed, 4 insertions(+), 1541 deletions(-) delete mode 100644 runtime/compute/cker/include/cker/operation/Einsum.h delete mode 100644 runtime/compute/cker/include/cker/operation/Helper/MatmulBCast.h delete mode 100644 runtime/onert/backend/cpu/ops/EinsumLayer.cc delete mode 100644 runtime/onert/backend/cpu/ops/EinsumLayer.h delete mode 100644 runtime/onert/core/include/ir/operation/Einsum.h delete mode 100644 runtime/onert/core/src/ir/operation/Einsum.cc delete mode 100644 runtime/tests/nnapi/specs/Ex/einsum_ex_float.mod.py diff --git a/runtime/compute/cker/include/cker/operation/Einsum.h b/runtime/compute/cker/include/cker/operation/Einsum.h deleted file mode 100644 index c6ceebccb91..00000000000 --- a/runtime/compute/cker/include/cker/operation/Einsum.h +++ /dev/null @@ -1,936 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __NNFW_CKER_EINSUM_H__ -#define __NNFW_CKER_EINSUM_H__ - -#include "cker/Types.h" -#include "cker/Shape.h" -#include "cker/Utils.h" - -#include "cker/operation/Helper/Tensor.h" -#include "cker/operation/Helper/MatmulBCast.h" - -#include "Transpose.h" -#include "BatchMatMul.h" - -#include -#include -#include -#include -#include - -namespace nnfw -{ -namespace cker -{ - -namespace functor -{ - -template struct StrideFunctor -{ - void operator()(const Device &d, typename TTypes::ConstTensor input, - const std::vector &strides, typename TTypes::Tensor output) - { - - Eigen::DSizes dsizes; - for (size_t d = 0; d < strides.size(); d++) - { - dsizes[d] = static_cast(strides[d]); - } - for (size_t d = strides.size(); d < N; d++) - { - dsizes[d] = 1; - } - - output.device(d) = input.stride(dsizes); - } -}; - -template struct InflateFunctor -{ - void operator()(const Device &d, typename TTypes::ConstTensor input, - const std::vector &strides, typename TTypes::Tensor output) - { - - Eigen::DSizes dsizes; - for (size_t d = 0; d < strides.size(); d++) - { - dsizes[d] = static_cast(strides[d]); - } - for (size_t d = strides.size(); d < N; d++) - { - dsizes[d] = 1; - } - - output.device(d) = input.inflate(dsizes); - } -}; - -template struct ReduceFunctor -{ - template - static void Reduce(const Device &d, OUT_T out, IN_T in, const ReductionAxes &reduction_axes, - const Reducer &reducer) - { - out.device(d) = in.reduce(reduction_axes, reducer); - } -}; - -template struct SetZeroFunctor -{ - // Computes on device "d": out = out.setZero(), - void operator()(const Device &d, typename TTypes::Flat out) - { - out.device(d) = out.constant(T(0)); - } -}; - -} // namespace functor - -using ShapeVec = std::vector; -using Labels = std::vector; -using OperandLabels = std::vector; -using LabelCounts = std::vector; -using OperandLabelCounts = std::vector; -using LabelToDimSizes = std::vector; - -// Each dimension is categorized into exactly one of five types based on -// whether its corresponding label is present in the input and/or the output -// subscripts. -enum DimensionType -{ - // Batch dimensions are those present in two inputs as well as the output. - // They are part of the batch dimensions during Tensor contraction. - // Such dimensions may be broadcasting dimensions (those mapping to - // ellipsis) - // or explicit batch dimensions corresponding to named axis labels. - kBroadcasting = 0, - kBatch = 1, - // Free dimensions are present in exactly one of the inputs, and also the - // output. These are non-contracted axes in the Tensor contraction. - kFree = 2, - // Contract dimensions are present in two inputs, but not the output. These - // dimensions are contracted in Tensor contraction. - kContract = 3, - // Reduce dimensions are present in exactly one input; and not in the output - // and are summed over prior to Tensor contraction. - kReduce = 4, -}; - -namespace -{ - -constexpr int kEllipsisLabel = -1; - -std::vector strSplit(std::string_view text, std::string_view delimiter) -{ - std::vector result; - - size_t start = 0; - size_t pos = 0; - - do - { - pos = text.find(delimiter, start); - if (pos == std::string::npos) - { - result.push_back(std::string(text.substr(start, text.size() - start))); - break; - } - - result.push_back(std::string(text.substr(start, pos - start))); - start = pos + delimiter.size(); - } while (pos != std::string::npos); - - return result; -} - -inline DimensionType getDimensionType(bool is_removed, bool is_unique) -{ - if (!is_removed && !is_unique) - return kBatch; - else if (!is_removed && is_unique) - return kFree; - else if (is_removed && !is_unique) - return kContract; - else // is_removed && is_unique - return kReduce; -} - -inline Shape copyShape(const Shape &shape) -{ - return Shape::ExtendedShape(shape.DimensionsCount(), shape); -} -} // namespace - -class Einsum -{ -public: - Einsum() : _prepared(false) - { - // DO NOTHING - } - - void prepare(std::string_view equation) - { - if (_prepared) - { - return; - } - - // Parse equation - parseEquation(equation); - _prepared = true; - } - - void operator()(std::string_view equation, const std::vector &input_shapes, - const std::vector &input_data, const Shape &output_shape, - float *output_data) - { - if (!_prepared) - { - prepare(equation); - } - - const int num_inputs = input_shapes.size(); - std::vector> inputs(num_inputs); - for (int i = 0; i < num_inputs; i++) - { - inputs[i].shape.ReplaceWith(input_shapes[i].DimensionsCount(), input_shapes[i].DimsData()); - inputs[i].buffer = input_data[i]; - } - - OperandLabels input_labels(_input_labels); - Labels output_labels(_output_labels); - std::vector label_types(_label_types); - OperandLabelCounts input_label_counts(_input_label_counts); - LabelCounts output_label_counts(_output_label_counts); - LabelToDimSizes label_to_dim_sizes; - - processDimensions(inputs, &input_labels, &output_labels, &label_types, &input_label_counts, - &output_label_counts, &label_to_dim_sizes); - - // The reduction phase (a) sums across reduction dimensions, (b) takes - // generalized diagonals, and (c) reshapes it into shape - // [(broadcasting) batch shape] + [F,C] - // where F and C denote the total (compacted) size of free and contract - // dimensions, respectively. - - OperandLabels free_labels(num_inputs); - std::vector inputs_reduced(num_inputs); - std::vector swap_free_and_contract(num_inputs); - for (int i = 0; i < num_inputs; ++i) - { - bool temp_swap_free_and_contract = false; - reduceOperand(inputs[i], label_types, input_label_counts[i], &input_labels[i], - &free_labels[i], &temp_swap_free_and_contract, &inputs_reduced[i]); - swap_free_and_contract[i] = temp_swap_free_and_contract; - } - - // After reduction, the inputs should be reshaped to Tensors suitable for - // contraction. If num_inputs is 1, the reduced input is simply forwarded to - // the output. - Tensor contraction_output_reshaped; - contractOperands(inputs_reduced, swap_free_and_contract, &contraction_output_reshaped); - - // Copy the batch labels from the contraction output. Recover the batch - // shape, which may have been broadcasted. - std::vector result_shape_dims(contraction_output_reshaped.shape.DimensionsCount() - 2); - - for (size_t i = 0; i < result_shape_dims.size(); i++) - { - result_shape_dims[i] = contraction_output_reshaped.shape.Dims(i); - } - - int num_labels = label_types.size(); - Labels result_labels; - // All batch dimensions should be present in the contracted result. First - // the broadcasting dimensions, then the named batch dimensions. - for (int label = 0; label < num_labels; ++label) - { - if (label_types[label] == kBroadcasting) - result_labels.push_back(label); - } - for (int label = 0; label < num_labels; ++label) - { - if (label_types[label] == kBatch) - result_labels.push_back(label); - } - for (int i = 0; i < num_inputs; ++i) - { - for (auto &&label : free_labels[i]) - { - result_labels.push_back(label); - result_shape_dims.push_back(label_to_dim_sizes[label]); - } - } - - Shape result_shape(result_shape_dims.size(), result_shape_dims.data()); - - // Reshape the contraction (or reduction) result to its expanded shape: - // [(broadcasted) batch shape] + [free shape 0] + [free shape 1]. - Tensor contraction_output; - copyFrom(contraction_output_reshaped, result_shape, &contraction_output); - - // Inflate the output if necessary. (E.g. for the equation 'i->iii' which - // may arise while computing gradient of a regular Einsum). - // TODO(anudhyan): It's possible that Eigen's contract and inflate can be - // chained here to avoid materializing an intermediate. - Tensor output_inflated; - strideOrInflate(contraction_output, result_labels, output_label_counts, - true /* should_inflate */, &output_inflated); - - if (output_inflated.shape.DimensionsCount() > contraction_output.shape.DimensionsCount()) - { - // We inflated the output. Modify result labels accordingly. - Labels inflated_labels; - for (auto &&label : result_labels) - { - inflated_labels.insert(inflated_labels.end(), output_label_counts[label], label); - } - result_labels.swap(inflated_labels); - } - - // Find the permutation to map the result labels to the output labels. Note - // that both the result and the final output may have the repeated labels, - // in which case the permutation preserves the left-to-right ordering. - // E.g. if result labels are [0, 0, 1] and output is [0, l, 0] then the - // permutation should be [0, 2, 1]. We also use the fact that repeated - // labels in the result are adjacent to each other. - std::vector output_permutation(output_labels.size()); - std::vector label_to_position(num_labels, -1); - for (size_t i = 0; i < result_labels.size(); ++i) - { - // Remember the position of only the leftmost result label. - if (label_to_position[result_labels[i]] == -1) - { - label_to_position[result_labels[i]] = i; - } - } - for (size_t i = 0; i < output_labels.size(); ++i) - { - output_permutation[i] = label_to_position[output_labels[i]]; - // We have found the leftmost occurrence. The next one would be adjacent. - label_to_position[output_labels[i]] += 1; - } - - InputTensor temp_inflated; - temp_inflated.shape.ReplaceWith(output_inflated.shape.DimensionsCount(), - output_inflated.shape.DimsData()); - temp_inflated.buffer = (reinterpret_cast(output_inflated.buffer)); - ; - - Tensor output; - transposeOperand(temp_inflated, output_permutation, &output); - - memcpy(output_data, output.buffer, output_shape.FlatSize() * sizeof(float)); - - temp_operand.clear(); - } - -private: - void parseEquation(std::string_view equation) - { - std::vector input_str; - std::string output_str; - - parseEinsumEquation(equation, input_str, output_str); - - // Temporary map from single character labels to (consecutive) integer - // labels. - std::map label_mapping; - int num_inputs = input_str.size(); - _input_labels.resize(num_inputs); - - // Map from single characters to integer labels. - for (int i = 0; i < num_inputs; ++i) - { - mapToLabels(input_str[i], _input_labels.at(i), label_mapping); - } - mapToLabels(output_str, _output_labels, label_mapping); - - // Compute counts for input and output labels. - int num_labels = label_mapping.size(); - _input_label_counts.resize(num_inputs); - _input_has_ellipsis.resize(num_inputs); - for (int i = 0; i < num_inputs; ++i) - { - _input_label_counts.at(i).resize(num_labels); - for (const int label : _input_labels.at(i)) - { - if (label != kEllipsisLabel) - _input_label_counts.at(i)[label] += 1; - else - _input_has_ellipsis.at(i) = true; - } - } - _output_label_counts.resize(num_labels); - for (const int label : _output_labels) - { - if (label != kEllipsisLabel) - _output_label_counts.at(label) += 1; - else - _output_has_ellipsis = true; - } - - // Map each label to a unique DimensionType. - _label_types.resize(num_labels); - for (int label = 0; label < num_labels; ++label) - { - bool removed = (_output_label_counts[label] == 0); - bool unique = - num_inputs == 1 || _input_label_counts[0][label] == 0 || _input_label_counts[1][label] == 0; - _label_types[label] = getDimensionType(removed, unique); - } - } - - void parseEinsumEquation(std::string_view &equation, std::vector &input_subscripts, - std::string &output_subscript) - { - std::vector inputs_and_output_subscripts = strSplit(equation, "->"); - if (inputs_and_output_subscripts.size() != 2) - { - throw std::runtime_error{"Einsum: Expecting exactly one '->' in einsum equation: " + - std::string(equation)}; - } - - output_subscript = inputs_and_output_subscripts[1]; - input_subscripts = strSplit(inputs_and_output_subscripts[0], ","); - if (input_subscripts.size() != 1 && input_subscripts.size() != 2) - { - throw std::runtime_error{"Einsum: Expecting 1 or 2 input subscripts in equation '" + - std::string(equation) + - "' but got: " + std::to_string(input_subscripts.size())}; - } - } - - // Maps the character labels to consecutive integers. - void mapToLabels(std::string_view subscript, Labels &labels, std::map &label_mapping) - { - for (size_t i = 0; i < subscript.size(); ++i) - { - const char label_char = subscript[i]; - if (label_char == '.') - { - labels.push_back(kEllipsisLabel); - i += 2; // Skip next 2 characters as well. - continue; - } - if (label_mapping.find(label_char) == label_mapping.end()) - { - const int next_label = label_mapping.size(); - label_mapping[label_char] = next_label; - } - const int mapped_label = label_mapping[label_char]; - labels.push_back(mapped_label); - } - } - - template - void processDimensions(const std::vector> &inputs, OperandLabels *input_labels, - Labels *output_labels, std::vector *label_types, - OperandLabelCounts *input_label_counts, LabelCounts *output_label_counts, - LabelToDimSizes *label_to_dim_sizes) - { - if (inputs.size() != input_labels->size()) - { - throw std::runtime_error{"Expected " + std::to_string(input_labels->size()) + - " inputs but got: " + std::to_string(inputs.size())}; - } - const int num_inputs = inputs.size(); - - // We infer the number of broadcasting dimensions by taking the maximum rank - // among the broadcasting subshapes of the input. - int max_bcast_dims = 0; - const int num_named_labels = label_types->size(); - label_to_dim_sizes->resize(num_named_labels); - for (int i = 0; i < num_inputs; ++i) - { - Labels *labels = &(*input_labels)[i]; - - if (!_input_has_ellipsis[i]) - { - if (inputs[i].shape.DimensionsCount() != ((int32_t)labels->size())) - { - throw std::runtime_error{"Expected input " + std::to_string(i) + " to have rank " + - std::to_string(labels->size()) + " but got: " + - std::to_string(inputs[i].shape.DimensionsCount())}; - } - for (size_t label_idx = 0; label_idx < labels->size(); ++label_idx) - { - const int label = (*labels)[label_idx]; - recordLabelToDimension(label, label_idx, inputs[i].shape, label_to_dim_sizes); - } - continue; - } - - // Input has an ellipsis. - if (inputs[i].shape.DimensionsCount() + 1 < (int32_t)labels->size()) - { - throw std::runtime_error{"Expected input " + std::to_string(i) + " to have rank at least " + - std::to_string(labels->size() - 1) + - " but got: " + std::to_string(inputs[i].shape.DimensionsCount())}; - } - int ellipsis_axis = -1; - const int num_bcast_dims = inputs[i].shape.DimensionsCount() - labels->size() + 1; - for (size_t label_idx = 0; label_idx < labels->size(); ++label_idx) - { - const int label = (*labels)[label_idx]; - if (label == kEllipsisLabel) - { - ellipsis_axis = label_idx; - continue; - } - // Current label is not an ellipsis. - const int axis = label_idx + (ellipsis_axis == -1 ? 0 : num_bcast_dims - 1); - recordLabelToDimension(label, axis, inputs[i].shape, label_to_dim_sizes); - } - // Found an ellipsis. Replace 'kEllipsisLabel' with broadcasting - // dimensions. - if (ellipsis_axis != -1) - { - insertBroadcastLabels(num_bcast_dims, num_named_labels, ellipsis_axis, labels, - &input_label_counts->at(i)); - max_bcast_dims = std::max(max_bcast_dims, num_bcast_dims); - } - } - - std::vector::iterator it_input = - std::find(_input_has_ellipsis.begin(), _input_has_ellipsis.end(), true); - if (it_input == _input_has_ellipsis.end() && !_output_has_ellipsis) - { - return; - } - // Insert broadcasting dimensions in the output labels. - auto it = std::find(output_labels->begin(), output_labels->end(), kEllipsisLabel); - if (it != output_labels->end()) - { - const int ellipsis_axis = it - output_labels->begin(); - insertBroadcastLabels(max_bcast_dims, num_named_labels, ellipsis_axis, output_labels, - output_label_counts); - } - else if (max_bcast_dims > 0) - { - std::runtime_error{"Output contains " + std::to_string(max_bcast_dims) + - " broadcasting dimension(s) but no ellipsis " + - "(...) was found in the output subscripts."}; - } - // Populate DimensionType for the new broadcasting labels. - label_types->resize(num_named_labels + max_bcast_dims, kBroadcasting); - } - - void recordLabelToDimension(const int32_t label, const int axis, const Shape &input_shape, - LabelToDimSizes *label_to_dim_sizes) - { - const int32_t input_dim = input_shape.Dims(axis); - // We know that label_to_dim_sizes has the size to accommodate named labels. - if (label_to_dim_sizes->at(label) != 0 && label_to_dim_sizes->at(label) != input_dim) - { - std::runtime_error{"Expected dimension " + std::to_string(label_to_dim_sizes->at(label)) + - " at axis " + std::to_string(axis) + - " of the input shaped but got dimension " + std::to_string(input_dim)}; - } - (*label_to_dim_sizes)[label] = input_dim; - } - - void insertBroadcastLabels(int num_bcast_dims, int num_named_labels, int ellipsis_axis, - Labels *labels, LabelCounts *label_counts) - { - labels->erase(labels->begin() + ellipsis_axis); - labels->insert(labels->begin() + ellipsis_axis, num_bcast_dims, 0); - std::iota(labels->begin() + ellipsis_axis, labels->begin() + ellipsis_axis + num_bcast_dims, - num_named_labels); - // Increment label counts. Since these are new labels, the count is set - // to 1. - label_counts->resize(num_named_labels + num_bcast_dims, 1); - } - - template - void reduceOperand(const InputTensor &input, const std::vector &label_types, - const LabelCounts &label_counts, Labels *labels, Labels *free_labels, - bool *swap_free_and_contract, Tensor *output) - { - // Find the permutation to transpose the input dimensions in the order of - // DimensionType; i.e. batch, free, contract and reduce dimensions. This - // makes it more convenient to invoke Reduce/Contract operations. - std::vector permutation(input.shape.DimensionsCount()); - std::iota(permutation.begin(), permutation.end(), 0); - Tensor input_transposed; - - // Check if we can avoid the transpose. We need to flip the adj_x (or adj_y) - // flag during BatchMatMul. This is an extra optimization not necessary for - // correctness. - if (shouldSwapFreeAndContract(*labels, label_types)) - { - *swap_free_and_contract = true; - } - else - { - std::sort(permutation.begin(), permutation.end(), [&](int i, int j) { - int label_i = (*labels)[i]; - int label_j = (*labels)[j]; - return std::tie(label_types[label_i], label_i) < std::tie(label_types[label_j], label_j); - }); - } - // Transpose the input so that DimensionTypes are in order. - transposeOperand(input, permutation, &input_transposed); - - permuteLabels(permutation, labels); - - // Take the generalized diagonal for dimensions with repeated axis labels. - Tensor input_deduped; - labels->erase(std::unique(labels->begin(), labels->end()), labels->end()); - strideOrInflate(input_transposed, *labels, label_counts, false /* should_inflate */, - &input_deduped); - - // Reshape denotes the rank-5 shape [broadcast, batch, free, contract, - // reduce] where we've compacted the dimensions of each DimensionType. - std::vector reshape(5, 1); - - // The output shape is [batch shape] + [free size, contract size] - // That is, the batch shape is preserved (for broadcasting while - // contracting) while the free dims and contract dims are compressed to one - // dimension each. - Shape output_shape; - std::vector output_shape_dims; - for (size_t label_idx = 0; label_idx < labels->size(); ++label_idx) - { - const int label = labels->at(label_idx); - int32_t dim = input_deduped.shape.Dims(label_idx); - if (label_types[label] == kBroadcasting || label_types[label] == kBatch) - { - output_shape_dims.push_back(dim); - } - else if (label_types[label] == kFree) - { - free_labels->push_back(label); - } - reshape[label_types[label]] *= dim; - } - - if (*swap_free_and_contract) - std::swap(reshape[kFree], reshape[kContract]); - - output_shape_dims.push_back(reshape[kFree]); - output_shape_dims.push_back(reshape[kContract]); - - output_shape.ReplaceWith(output_shape_dims.size(), output_shape_dims.data()); - - if (reshape[kReduce] == 1) - { // No need to actually reduce. - return copyFrom(input_deduped, output_shape, output); - } - - allocateTemp(output_shape, output); - - using Reducer = Eigen::internal::SumReducer; - using Index = typename TTypes::Tensor::Index; - - const Eigen::ThreadPoolDevice &device = *eigen_support::GetThreadPoolDevice(); - - // Reduce along the last axis (i.e axis 1) of the rank-2 Tensor. - const int32_t output_size = - reshape[kBroadcasting] * reshape[kBatch] * reshape[kFree] * reshape[kContract]; - functor::ReduceFunctor::Reduce( - device, output->shaped({output_size}), - input_deduped.shaped({output_size, reshape[kReduce]}), Eigen::array({1}), - Reducer()); - } - - bool shouldSwapFreeAndContract(const Labels &labels, - const std::vector &label_types) - { - // Check that ordering is according to dimension type, with the role of - // free and contract dimensions swapped. - std::vector remap = {0, 1, 3, 2, 4}; - for (size_t i = 0; i + 1 < labels.size(); ++i) - { - const int dimtype_a = remap[label_types[labels[i]]]; - const int dimtype_b = remap[label_types[labels[i + 1]]]; - if (dimtype_a > dimtype_b || (dimtype_a == dimtype_b && labels[i] > labels[i + 1])) - { - return false; - } - } - return true; - } - - template - void transposeOperand(const InputTensor &input, const std::vector &permutation, - Tensor *output) - { - if (!shouldTranspose(input.shape, permutation)) - { - copyFrom(input, input.shape, output); - return; - } - Shape transposed_shape(input.shape.DimensionsCount()); - for (int i = 0; i < input.shape.DimensionsCount(); ++i) - { - transposed_shape.SetDim(i, input.shape.Dims(permutation[i])); - } - // For empty Tensors, just change the shape. E.g. we may need to transpose - // from shape [1, 0, 5] to [5, 1, 0]. - if (input.shape.FlatSize() == 0) - { - copyFrom(input, transposed_shape, output); - return; - } - - temp_operand.emplace_back(std::make_unique(transposed_shape.FlatSize())); - T *new_buffer = temp_operand.back().get(); - - TransposeParams transpose_params; - transpose_params.perm_count = permutation.size(); - for (size_t i = 0; i < permutation.size(); i++) - { - transpose_params.perm[i] = permutation[i]; - } - - Transpose(transpose_params, input.shape, input.buffer, transposed_shape, new_buffer); - - output->shape.ReplaceWith(transposed_shape.DimensionsCount(), transposed_shape.DimsData()); - output->buffer = new_buffer; - } - - bool shouldTranspose(const Shape &input_shape, const std::vector &permutation) - { - if (input_shape.DimensionsCount() < 2) - return false; - for (size_t i = 0; i < permutation.size(); ++i) - { - if (permutation[i] != (int32_t)i) - return true; - } - return false; - } - - template - void copyFrom(const InputTensor &input, const Shape &shape, Tensor *output) - { - Tensor temp_tensor; - temp_tensor.shape.ReplaceWith(input.shape.DimensionsCount(), input.shape.DimsData()); - temp_operand.emplace_back(std::make_unique(input.shape.FlatSize())); - temp_tensor.buffer = temp_operand.back().get(); - memcpy(temp_tensor.buffer, input.buffer, input.shape.FlatSize() * sizeof(float)); - - copyFrom(temp_tensor, shape, output); - } - - void copyFrom(const Tensor &input, const Shape &shape, Tensor *output) - { - if (output->copyFrom(input, shape)) - return; - - throw std::runtime_error{"Einsum: Encountered error while reshaping a Tensor"}; - } - - // Permutes the labels according to the given permutation. - void permuteLabels(const std::vector &permutation, Labels *labels) - { - Labels permuted_labels(labels->size()); - for (size_t i = 0; i < labels->size(); ++i) - { - permuted_labels[i] = (*labels)[permutation[i]]; - } - labels->swap(permuted_labels); - } - - // If there are repeated labels in either the input or output, then this - // strides the input (e.g. iii->i) or inflates it (e.g. i->iii), respectively. - template - void strideOrInflate(const Tensor &input, const Labels &labels, const LabelCounts &label_counts, - const bool should_inflate, Tensor *output) - { - // Return early if there are no repeated indices. - if (std::all_of(label_counts.begin(), label_counts.end(), [](int c) { return c <= 1; })) - { - return copyFrom(input, input.shape, output); - } - // We reshape so that each repeated label is compressed to one dimension. - // E.g. For iiij -> ij, The shape [3, 3, 3, 5] would be compressed to [27, - // 5]. Striding appropriately (in this case with strides 14 (=1+3+9) and 1) - // recovers the generalized diagonal of shape [3, 5]. - std::vector reshape; - std::vector strides; - // Strided and inflated shapes correspond to input and output shapes, - // respectively, should_inflate is true (vice-versa if should_inflate is - // false). E.g. they are [3, 5] and [3, 3, 3, 5] in the above example. - Shape strided_shape; - Shape inflated_shape; - std::vector strided_shape_dims; - std::vector inflated_shape_dims; - for (auto &&label : labels) - { - const int32_t count = label_counts[label]; - const int current_axis = - should_inflate ? strided_shape_dims.size() : inflated_shape_dims.size(); - const int32_t dim = input.shape.Dims(current_axis); - strided_shape_dims.push_back(dim); - inflated_shape_dims.insert(inflated_shape_dims.end(), count, dim); - const int32_t reshape_dim = std::pow(dim, count); - reshape.push_back(reshape_dim); - // While taking the d-diagonal in a rank k Tensor, we take d - // equally-spaced elements including the first and last element. Then, (k - // - 1) * stride = d^k - 1, or, stride = (d^k - 1)/(d - 1). - const int32_t stride = (dim > 1 && count > 1) ? (reshape_dim - 1) / (dim - 1) : 1; - strides.push_back(stride); - } - - strided_shape.ReplaceWith(strided_shape_dims.size(), strided_shape_dims.data()); - inflated_shape.ReplaceWith(inflated_shape_dims.size(), inflated_shape_dims.data()); - - Shape output_shape = Shape(should_inflate ? inflated_shape : strided_shape); - - output->shape.ReplaceWith(output_shape.DimensionsCount(), output_shape.DimsData()); - temp_operand.emplace_back(std::make_unique(output_shape.FlatSize())); - output->buffer = temp_operand.back().get(); - - const Eigen::ThreadPoolDevice &device = *eigen_support::GetThreadPoolDevice(); - - switch (reshape.size()) - { -#define NDIMS_CASE(N) \ - case N: \ - { \ - if (should_inflate) \ - { \ - auto output_map = output->shaped(reshape); \ - auto input_map = input.shaped(strided_shape_dims); \ - functor::InflateFunctor()(device, input_map, strides, \ - output_map); \ - } \ - else \ - { \ - auto input_map = input.shaped(reshape); \ - auto output_map = output->shaped(strided_shape_dims); \ - functor::StrideFunctor()(device, input_map, strides, \ - output_map); \ - } \ - } \ - break; - NDIMS_CASE(1); - NDIMS_CASE(2); - NDIMS_CASE(3); - NDIMS_CASE(4); - NDIMS_CASE(5); - NDIMS_CASE(6); - default: - throw std::runtime_error{"Unsupported rank: " + std::to_string(reshape.size()) + - " while handling repeated indices. Up to rank 6 is supported."}; -#undef NDIMS_CASE - } - } - - void allocateTemp(const Shape &shape, Tensor *output) - { - output->shape.ReplaceWith(shape.DimensionsCount(), shape.DimsData()); - temp_operand.emplace_back(std::make_unique(shape.FlatSize())); - output->buffer = temp_operand.back().get(); - } - - // Contracts the inputs along the last axis. (or the second last if the - // corresponding value of swap_free_and_contract is true). The batch - // dimensions are broadcast to the output shape. - // TODO(anudhyan): Factor this function into a BatchMatMul functor and support - // transpose_x and transpose_y attributes (in addition to adj_x and adj_y). - // Also, the BatchMatMul might devolve into a component-wise multiplication - // when the matrix shape is [1,1]; in this case BatchMatMul functor would be - // very inefficient. The functor should detect if this is the case and perform - // componentwise multiplication functor instead. - void contractOperands(std::vector &inputs, std::vector &swap_free_and_contract, - Tensor *output) - { - if (inputs.size() == 1) - return copyFrom(inputs[0], inputs[0].shape, output); - - MatMulBCast bcast(inputs[0].shape, inputs[1].shape); - if (!bcast.IsValid()) - { - throw std::runtime_error{"Einsum: Invalid broadcasting dimensions"}; - } - - Tensor lhs; - reshapeToRank3(inputs[0], bcast.x_batch_size(), &lhs); - Tensor rhs; - reshapeToRank3(inputs[1], bcast.y_batch_size(), &rhs); - Shape old_output_shape = bcast.output_batch_shape(); - Shape output_shape(static_cast(old_output_shape.DimensionsCount() + inputs.size())); - for (int i = 0; i < old_output_shape.DimensionsCount(); i++) - { - output_shape.SetDim(i, old_output_shape.Dims(i)); - } - - for (size_t i = 0; i < inputs.size(); ++i) - { - const int32_t free_axis = - inputs[i].shape.DimensionsCount() - (swap_free_and_contract[i] ? 1 : 2); - output_shape.SetDim(i + old_output_shape.DimensionsCount(), inputs[i].shape.Dims(free_axis)); - } - bool adj_x = swap_free_and_contract[0]; - bool adj_y = !swap_free_and_contract[1]; - - allocateTemp(output_shape, output); - - const Eigen::ThreadPoolDevice &device = *eigen_support::GetThreadPoolDevice(); - - if (lhs.shape.FlatSize() == 0 || rhs.shape.FlatSize() == 0) - { - functor::SetZeroFunctor set_zero; - set_zero(device, - typename TTypes::Tensor(output->base(), output->shape.FlatSize())); - return; - } - - Tensor output_reshaped; - reshapeToRank3(*output, bcast.output_batch_size(), &output_reshaped); - - // LaunchBatchMatMul::Launch(lhs, rhs, adj_x, adj_y, bcast, &output_reshaped); - BatchMatMul batchMatMul; - // Set rhs is not constant: don't use optimization - batchMatMul.prepare(lhs.shape, rhs.shape, adj_x, adj_y, false); - batchMatMul(lhs.shape, lhs.base(), rhs.shape, rhs.base(), adj_x, adj_y, - output_reshaped.shape, output_reshaped.base()); - } - - void reshapeToRank3(const Tensor &input, int batch_size, Tensor *output) - { - const int rank = input.shape.DimensionsCount(); - Shape output_shape({batch_size, input.shape.Dims(rank - 2), input.shape.Dims(rank - 1)}); - copyFrom(input, output_shape, output); - } - -private: - bool _prepared; - - OperandLabels _input_labels; - Labels _output_labels; - std::vector _label_types; - OperandLabelCounts _input_label_counts; - LabelCounts _output_label_counts; - std::vector _input_has_ellipsis; - bool _output_has_ellipsis = false; - - std::vector> temp_operand; -}; - -} // namespace cker -} // namespace nnfw - -#endif // __NNFW_CKER_EINSUM_H__ diff --git a/runtime/compute/cker/include/cker/operation/FusedBatchNorm.h b/runtime/compute/cker/include/cker/operation/FusedBatchNorm.h index 88d48fc0114..74bda43f746 100644 --- a/runtime/compute/cker/include/cker/operation/FusedBatchNorm.h +++ b/runtime/compute/cker/include/cker/operation/FusedBatchNorm.h @@ -23,7 +23,6 @@ #include "cker/Utils.h" #include "cker/operation/Helper/Tensor.h" -#include "cker/operation/Helper/MatmulBCast.h" #include "Transpose.h" #include "BatchMatMul.h" @@ -146,7 +145,7 @@ class FusedBatchNorm if (output->copyFrom(input, shape)) return; - throw std::runtime_error{"Einsum: Encountered error while reshaping a Tensor"}; + throw std::runtime_error{"FusedBatchNorm: Encountered error while reshaping a Tensor"}; } private: diff --git a/runtime/compute/cker/include/cker/operation/Helper/BCast.h b/runtime/compute/cker/include/cker/operation/Helper/BCast.h index 211db98cee0..c7173757617 100644 --- a/runtime/compute/cker/include/cker/operation/Helper/BCast.h +++ b/runtime/compute/cker/include/cker/operation/Helper/BCast.h @@ -18,12 +18,6 @@ #ifndef __NNFW_CKER_HELPER_BCAST_H__ #define __NNFW_CKER_HELPER_BCAST_H__ -/** - * ToDo : This file will be moved into upper folder when integrate with other - * custom operations. - * And It should merged with EinsumHelper's BCast. - **/ - #include "cker/Shape.h" #include "cker/eigen/EigenSupport.h" diff --git a/runtime/compute/cker/include/cker/operation/Helper/MatmulBCast.h b/runtime/compute/cker/include/cker/operation/Helper/MatmulBCast.h deleted file mode 100644 index b7d63943304..00000000000 --- a/runtime/compute/cker/include/cker/operation/Helper/MatmulBCast.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __NNFW_CKER_EINSUM_HELPER_MATMUL_BCAST_H__ -#define __NNFW_CKER_EINSUM_HELPER_MATMUL_BCAST_H__ - -#include -#include -#include - -#include "BCast.h" -#include "cker/Shape.h" - -namespace nnfw -{ -namespace cker -{ - -// Simple wrapper over BCast specialized for MatMul. -// Provides utilities for broadcasting across batch dimensions for binary -// MatMul-like operations. - -// Fix: Use Shape directly instead of Vec -class MatMulBCast -{ -public: - MatMulBCast(Shape &shape_x, Shape &shape_y) - { - if (shape_x.DimensionsCount() < 2 || shape_y.DimensionsCount() < 2) - return; - - std::vector x; - std::vector y; - - x.resize(shape_x.DimensionsCount() - 2); - y.resize(shape_y.DimensionsCount() - 2); - - for (size_t i = 0; i < x.size(); i++) - { - x[i] = shape_x.Dims(i); - } - for (size_t i = 0; i < y.size(); i++) - { - y[i] = shape_y.Dims(i); - } - - _batch_bcast = std::make_unique(std::move(x), std::move(y)); - if (!_batch_bcast->IsValid()) - return; - - const auto &x_reshaped = _batch_bcast->x_reshape(); - const auto &y_reshaped = _batch_bcast->y_reshape(); - auto output_shape = _batch_bcast->output_shape(); - - _x_batch_size = std::accumulate(x_reshaped.cbegin(), x_reshaped.cend(), INT32_C(1), - std::multiplies()); - _y_batch_size = std::accumulate(y_reshaped.cbegin(), y_reshaped.cend(), INT32_C(1), - std::multiplies()); - _output_shape.ReplaceWith(output_shape.size(), output_shape.data()); - _output_batch_size = _output_shape.FlatSize(); - } - - bool IsValid() const { return (_batch_bcast != nullptr) && _batch_bcast->IsValid(); } - int32_t x_batch_size() const { return _x_batch_size; } - int32_t y_batch_size() const { return _y_batch_size; } - int32_t output_batch_size() const { return _output_batch_size; } - const Shape &output_batch_shape() const { return _output_shape; } - -private: - std::unique_ptr _batch_bcast; - - int32_t _x_batch_size; - int32_t _y_batch_size; - Shape _output_shape; - int32_t _output_batch_size; -}; - -} // namespace cker -} // namespace nnfw - -#endif // __NNFW_CKER_EINSUM_HELPER_MATMUL_BCAST_H__ diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc index a4a8459c129..7030c759a2a 100644 --- a/runtime/onert/backend/cpu/KernelGenerator.cc +++ b/runtime/onert/backend/cpu/KernelGenerator.cc @@ -25,7 +25,6 @@ #include "ops/ConvolutionLayer.h" #include "ops/DepthToSpaceLayer.h" #include "ops/DepthwiseConvolutionLayer.h" -#include "ops/EinsumLayer.h" #include "ops/ElementwiseActivationLayer.h" #include "ops/ElementwiseBinaryLayer.h" #include "ops/ElementwiseUnaryLayer.h" @@ -602,24 +601,6 @@ void KernelGenerator::visit(const ir::operation::OneHot &node) _return_fn = std::move(fn); } -void KernelGenerator::visit(const ir::operation::Einsum &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - - auto output_tensor = _tensor_reg->getPortableTensor(ofm_index); - std::vector input_tensors; - for (const auto &ifm_idx : node.getInputs()) - input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx)); - - const auto &equation = node.param().equation; - - auto fn = std::make_unique(); - - fn->configure(input_tensors, equation, output_tensor); - - _return_fn = std::move(fn); -} - void KernelGenerator::visit(const ir::operation::Custom &node) { auto fill_op_info = [&](const ir::OperandIndexSequence &opSeq, diff --git a/runtime/onert/backend/cpu/KernelGenerator.h b/runtime/onert/backend/cpu/KernelGenerator.h index b8ed5cb0a4a..5ead87e61f6 100644 --- a/runtime/onert/backend/cpu/KernelGenerator.h +++ b/runtime/onert/backend/cpu/KernelGenerator.h @@ -52,7 +52,6 @@ class KernelGenerator : public basic::KernelGeneratorBase void visit(const ir::operation::Custom &node) override; void visit(const ir::operation::DepthToSpace &) override; void visit(const ir::operation::DepthwiseConv2D &) override; - void visit(const ir::operation::Einsum &) override; void visit(const ir::operation::ElementwiseActivation &) override; void visit(const ir::operation::ElementwiseBinary &) override; void visit(const ir::operation::ElementwiseUnary &) override; diff --git a/runtime/onert/backend/cpu/ops/EinsumLayer.cc b/runtime/onert/backend/cpu/ops/EinsumLayer.cc deleted file mode 100644 index 14c4ccca650..00000000000 --- a/runtime/onert/backend/cpu/ops/EinsumLayer.cc +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "EinsumLayer.h" - -#include - -namespace onert::backend::cpu::ops -{ - -EinsumLayer::EinsumLayer() - : _inputs(), _output(nullptr), _equation(), _einsum_kernel(new nnfw::cker::Einsum()) -{ - // DO NOTHING -} - -EinsumLayer::~EinsumLayer() = default; - -void EinsumLayer::einsumFloat32() -{ - uint32_t num_inputs = _inputs.size(); - nnfw::cker::Einsum &kernel = *_einsum_kernel; - - kernel.prepare(_equation); - - std::vector inputShapes; - std::vector inputFloatPtrs; - - for (uint32_t i = 0; i < num_inputs; i++) - { - inputShapes.emplace_back(getShape(_inputs[i])); - inputFloatPtrs.emplace_back(getBuffer(_inputs[i])); - } - - kernel(_equation, inputShapes, inputFloatPtrs, getShape(_output), getBuffer(_output)); -} - -void EinsumLayer::run() -{ - if (_output->data_type() == OperandType::FLOAT32) - { - einsumFloat32(); - } - else - { - throw std::runtime_error{"Einsum: unsupported data type"}; - } -} - -void EinsumLayer::configure(const std::vector &inputs, - std::string equation, IPortableTensor *output) -{ - assert(inputs.size() > 0); - assert(output != nullptr); - - _inputs = inputs; - _equation = equation; - _output = output; -} - -} // namespace onert::backend::cpu::ops diff --git a/runtime/onert/backend/cpu/ops/EinsumLayer.h b/runtime/onert/backend/cpu/ops/EinsumLayer.h deleted file mode 100644 index e05aad4661a..00000000000 --- a/runtime/onert/backend/cpu/ops/EinsumLayer.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CPU_OPS_EINSUM_LAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_EINSUM_LAYER_H__ - -#include -#include "OperationUtils.h" - -#include -#include -#include - -namespace nnfw::cker -{ -class Einsum; -} // namespace nnfw::cker - -namespace onert::backend::cpu::ops -{ - -class EinsumLayer : public ::onert::exec::IFunction -{ -public: - EinsumLayer(); - ~EinsumLayer(); - -public: - void einsumFloat32(); - - void configure(const std::vector &inputs, std::string equation, - IPortableTensor *output); - - void run() override; - -private: - std::vector _inputs; - IPortableTensor *_output; - - std::string _equation; - - std::unique_ptr _einsum_kernel; -}; - -} // namespace onert::backend::cpu::ops - -#endif // __ONERT_BACKEND_CPU_OPS_EINSUM_LAYER_H__ diff --git a/runtime/onert/core/include/ir/Operations.Include.h b/runtime/onert/core/include/ir/Operations.Include.h index 303ee9ae002..82024217027 100644 --- a/runtime/onert/core/include/ir/Operations.Include.h +++ b/runtime/onert/core/include/ir/Operations.Include.h @@ -33,7 +33,6 @@ #include "ir/operation/Custom.h" #include "ir/operation/DepthToSpace.h" #include "ir/operation/DepthwiseConv2D.h" -#include "ir/operation/Einsum.h" #include "ir/operation/ElementwiseActivation.h" #include "ir/operation/ElementwiseBinary.h" #include "ir/operation/ElementwiseUnary.h" diff --git a/runtime/onert/core/include/ir/Operations.lst b/runtime/onert/core/include/ir/Operations.lst index c37de7a8d46..0b87d2c76b9 100644 --- a/runtime/onert/core/include/ir/Operations.lst +++ b/runtime/onert/core/include/ir/Operations.lst @@ -36,7 +36,6 @@ OP(ConvertFp32ToFp16) OP(Custom) OP(DepthToSpace) OP(DepthwiseConv2D) -OP(Einsum) OP(ElementwiseActivation) OP(ElementwiseBinary) OP(ElementwiseUnary) diff --git a/runtime/onert/core/include/ir/operation/Einsum.h b/runtime/onert/core/include/ir/operation/Einsum.h deleted file mode 100644 index a1faa7a1c15..00000000000 --- a/runtime/onert/core/include/ir/operation/Einsum.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_IR_OPERATION_EINSUM_H__ -#define __ONERT_IR_OPERATION_EINSUM_H__ - -#include "ir/Operation.h" -#include "ir/InternalType.h" - -namespace onert::ir::operation -{ - -class Einsum : public Operation -{ -public: - struct Param - { - std::string equation; - }; - -public: - Einsum(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, - const Param ¶m); - -public: - void accept(OperationVisitor &v) const override; - OpCode opcode() const final { return OpCode::Einsum; } - -public: - const Param ¶m() const { return _param; } - -private: - Param _param; -}; - -} // namespace onert::ir::operation - -#endif // __ONERT_IR_OPERATION_EINSUM_H__ diff --git a/runtime/onert/core/src/ir/operation/Einsum.cc b/runtime/onert/core/src/ir/operation/Einsum.cc deleted file mode 100644 index 50f6a8d1869..00000000000 --- a/runtime/onert/core/src/ir/operation/Einsum.cc +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ir/operation/Einsum.h" -#include "ir/OperationVisitor.h" - -namespace onert::ir::operation -{ - -void Einsum::accept(OperationVisitor &v) const { v.visit(*this); } - -Einsum::Einsum(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, - const Param ¶m) - : Operation{OperandConstraint::createAtLeast(1u), inputs, outputs}, _param{param} -{ -} - -} // namespace onert::ir::operation diff --git a/runtime/onert/core/src/ir/train/operation/UntrainableOperation.test.cc b/runtime/onert/core/src/ir/train/operation/UntrainableOperation.test.cc index 4ae72668dd9..a0eb5d4afdb 100644 --- a/runtime/onert/core/src/ir/train/operation/UntrainableOperation.test.cc +++ b/runtime/onert/core/src/ir/train/operation/UntrainableOperation.test.cc @@ -164,14 +164,6 @@ operation::DetectionPostProcess generateDetectionPostProcess() param}; } -operation::Einsum generateEinsum() -{ - operation::Einsum::Param param; - param.equation = ""; - - return operation::Einsum{OperandIndexSequence{1}, OperandIndexSequence{0}, param}; -} - operation::ElementwiseActivation generateElementwiseActivation() { operation::ElementwiseActivation::Param param; @@ -662,9 +654,6 @@ TEST(UntrainableOperation, testAllOps) const auto detection = generateDetectionPostProcess(); verifyOp(detection); - const auto einsum = generateEinsum(); - verifyOp(einsum); - const auto activation = generateElementwiseActivation(); verifyOp(activation); @@ -942,12 +931,6 @@ TEST(UntrainableOperation, neg_TrainableOperationVisitor) EXPECT_ANY_THROW(visitor.invoke(*untrainable)); } - { - const auto einsum = generateEinsum(); - auto untrainable = generateUntrainableOperation(einsum); - EXPECT_ANY_THROW(visitor.invoke(*untrainable)); - } - { const auto activation = generateElementwiseActivation(); auto untrainable = generateUntrainableOperation(activation); diff --git a/runtime/onert/core/src/loader/BaseLoader.h b/runtime/onert/core/src/loader/BaseLoader.h index 4f5083d33be..919f74866a6 100644 --- a/runtime/onert/core/src/loader/BaseLoader.h +++ b/runtime/onert/core/src/loader/BaseLoader.h @@ -138,7 +138,6 @@ template class BaseLoader void loadCustom(const Operator *op, ir::Graph &subg); void loadDepthToSpace(const Operator *op, ir::Graph &subg); void loadDepthwiseConv2D(const Operator *op, ir::Graph &subg); - void loadEinsum(const Operator *op, ir::Graph &subg); void loadElementwiseActivation(const Operator *op, ir::Graph &subg, ir::operation::ElementwiseActivation::Type op_type, float alpha = 0.f, float beta = 0.f); @@ -1124,7 +1123,6 @@ void BaseLoader::loadCustom(const Operator *op, ir::Graph &subg) ReduceAll, MatrixBandPart, BatchMatMul, - Einsum, BroadcastTo, FusedBatchNorm, StatelessRandomUniform, @@ -1138,7 +1136,6 @@ void BaseLoader::loadCustom(const Operator *op, ir::Graph &subg) {"All", BuiltinOP::ReduceAll}, {"MatrixBandPart", BuiltinOP::MatrixBandPart}, {"BatchMatMulV2", BuiltinOP::BatchMatMul}, - {"Einsum", BuiltinOP::Einsum}, {"FusedBatchNormV3", BuiltinOP::FusedBatchNorm}, {"BroadcastTo", BuiltinOP::BroadcastTo}, {"StatelessRandomUniform", BuiltinOP::StatelessRandomUniform}, @@ -1164,9 +1161,6 @@ void BaseLoader::loadCustom(const Operator *op, ir::Graph &subg) case BuiltinOP::BatchMatMul: loadBatchMatMul(op, subg); break; - case BuiltinOP::Einsum: - loadEinsum(op, subg); - break; case BuiltinOP::BroadcastTo: loadOperationTo(op, subg); break; @@ -1307,26 +1301,6 @@ void BaseLoader::loadComparison(const Operator *op, ir::Graph &sub loadOperationTo(op, subg, param); } -template -void BaseLoader::loadEinsum(const Operator *op, ir::Graph &subg) -{ - ir::operation::Einsum::Param param; - if (op->custom_options() == nullptr) - { - throw std::runtime_error{"Einsum: empty equation"}; - } - else - { - const auto attr_map = getCustomOpAttrMap(op); - param.equation = attr_map["equation"].ToString(); - } - - const auto es = loadOperationTo(op, subg, param); - if (es->getInputs().size() != 2) - { - throw std::runtime_error{"Einsum: NYI input - only support two inputs"}; - } -} template void BaseLoader::loadFusedBatchNorm(const Operator *op, ir::Graph &subg) { diff --git a/runtime/tests/nnapi/CMakeLists.txt b/runtime/tests/nnapi/CMakeLists.txt index cd6ee944cfc..afc98a69f0f 100644 --- a/runtime/tests/nnapi/CMakeLists.txt +++ b/runtime/tests/nnapi/CMakeLists.txt @@ -12,7 +12,7 @@ set(GENERATED_CPPS "${CMAKE_CURRENT_SOURCE_DIR}/src/generated/all_generated_V1_2 # Skip script for generate test and use pre-generated test if (GENERATE_RUNTIME_NNAPI_TESTS) set(GENERATOR_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/specs/generate_test.sh") - file(GLOB_RECURSE TEST_SPECS "${CMAKE_CURRENT_SOURCE_DIR}/specs/*.mod.py") + file(GLOB_RECURSE TEST_SPECS CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/specs/*.mod.py") add_custom_command(OUTPUT ${GENERATED_CPPS} COMMAND ${GENERATOR_SCRIPT} diff --git a/runtime/tests/nnapi/bridge/include/NeuralNetworksEx.h b/runtime/tests/nnapi/bridge/include/NeuralNetworksEx.h index f0387995df3..cf06b95ae60 100644 --- a/runtime/tests/nnapi/bridge/include/NeuralNetworksEx.h +++ b/runtime/tests/nnapi/bridge/include/NeuralNetworksEx.h @@ -506,21 +506,10 @@ typedef enum ANEURALNETWORKS_MATRIX_BAND_PART_EX = 50036, /** - * Tensor contraction over specified indices and outer product * - * Supported tensor {@link OperandCode}: - * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} - * - * Supported tensor rank: up to 4 - * - * Inputs: - * * 0 ~ n-1: The list of n input tensors. - * * 1: An 1-D tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}. Each element represent - * equation character. - * Always scalar is 1.0 and zeroPoint is 0 + * IMPORTANT NOTICE: + * ANEURALNETWORKS_REDUCE_MAX_EX operation is DEPRECATED * - * Outputs: - * * 0: An output tensor. */ ANEURALNETWORKS_EINSUM_EX = 50037, diff --git a/runtime/tests/nnapi/bridge/wrapper/OperationFactory.cc b/runtime/tests/nnapi/bridge/wrapper/OperationFactory.cc index ba739f6180c..877df4759ce 100644 --- a/runtime/tests/nnapi/bridge/wrapper/OperationFactory.cc +++ b/runtime/tests/nnapi/bridge/wrapper/OperationFactory.cc @@ -1566,29 +1566,6 @@ OperationFactory::OperationFactory() return new operation::BatchMatMul{inputs, outputs, param}; }; - _map[ANEURALNETWORKS_EINSUM_EX] = [](const OperationFactory::Param &init_param, - Operands &operands) { - // Each input should be interpreted as follows: - // - // 0....n - 1 -> n Input Tensors Index - // n -> equation - assert(init_param.input_count >= 1 && init_param.output_count == 1); - - OperandIndexSequence inputs; - for (uint32_t n = 0; n < init_param.input_count - 1; ++n) - { - inputs.append(OperandIndex{init_param.inputs[n]}); - } - OperandIndexSequence outputs{init_param.outputs[0]}; - - operation::Einsum::Param param; - const OperandIndex equation_index{init_param.inputs[init_param.input_count - 1]}; - std::vector equation_vector = operands.at(equation_index).asVector(); - param.equation = std::string(equation_vector.begin(), equation_vector.end()); - - return new operation::Einsum{inputs, outputs, param}; - }; - // 0 -> Input Tensor Index // 1 -> int32, int64, An 1-D int tensor Index _map[ANEURALNETWORKS_BROADCAST_TO_EX] = createSimpleBinaryOp; diff --git a/runtime/tests/nnapi/nnapi_gtest.skip.aarch64-android.acl_cl b/runtime/tests/nnapi/nnapi_gtest.skip.aarch64-android.acl_cl index 4e4d68871b9..5fa03711e44 100644 --- a/runtime/tests/nnapi/nnapi_gtest.skip.aarch64-android.acl_cl +++ b/runtime/tests/nnapi/nnapi_gtest.skip.aarch64-android.acl_cl @@ -59,11 +59,6 @@ GeneratedTests.dequantize_v1_2 GeneratedTests.dequantize_v1_2_zero_sized GeneratedTests.dequantize_v1_2_zero_sized_float16 GeneratedTests.div_dynamic_nnfw -GeneratedTests.einsum_ex_float_matmul_2x2_2 -GeneratedTests.einsum_ex_float_matmul_3x2_3 -GeneratedTests.einsum_ex_float_matmul_3x3_4 -GeneratedTests.einsum_ex_float_matmul_4x4_4 -GeneratedTests.einsum_ex_float_matmul_4x4_4_2 GeneratedTests.equal_dynamic_float_nnfw GeneratedTests.exp_ GeneratedTests.exp_dynamic_nnfw diff --git a/runtime/tests/nnapi/nnapi_gtest.skip.aarch64-android.acl_neon b/runtime/tests/nnapi/nnapi_gtest.skip.aarch64-android.acl_neon index 9e337bc7eba..042e8f71ced 100644 --- a/runtime/tests/nnapi/nnapi_gtest.skip.aarch64-android.acl_neon +++ b/runtime/tests/nnapi/nnapi_gtest.skip.aarch64-android.acl_neon @@ -58,11 +58,6 @@ GeneratedTests.dequantize_v1_2 GeneratedTests.dequantize_v1_2_zero_sized GeneratedTests.dequantize_v1_2_zero_sized_float16 GeneratedTests.div_dynamic_nnfw -GeneratedTests.einsum_ex_float_matmul_2x2_2 -GeneratedTests.einsum_ex_float_matmul_3x2_3 -GeneratedTests.einsum_ex_float_matmul_3x3_4 -GeneratedTests.einsum_ex_float_matmul_4x4_4 -GeneratedTests.einsum_ex_float_matmul_4x4_4_2 GeneratedTests.equal_boolean GeneratedTests.equal_dynamic_float_nnfw GeneratedTests.exp_ diff --git a/runtime/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_cl b/runtime/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_cl index 4e4d68871b9..5fa03711e44 100644 --- a/runtime/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_cl +++ b/runtime/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_cl @@ -59,11 +59,6 @@ GeneratedTests.dequantize_v1_2 GeneratedTests.dequantize_v1_2_zero_sized GeneratedTests.dequantize_v1_2_zero_sized_float16 GeneratedTests.div_dynamic_nnfw -GeneratedTests.einsum_ex_float_matmul_2x2_2 -GeneratedTests.einsum_ex_float_matmul_3x2_3 -GeneratedTests.einsum_ex_float_matmul_3x3_4 -GeneratedTests.einsum_ex_float_matmul_4x4_4 -GeneratedTests.einsum_ex_float_matmul_4x4_4_2 GeneratedTests.equal_dynamic_float_nnfw GeneratedTests.exp_ GeneratedTests.exp_dynamic_nnfw diff --git a/runtime/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_neon b/runtime/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_neon index 9e337bc7eba..042e8f71ced 100644 --- a/runtime/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_neon +++ b/runtime/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_neon @@ -58,11 +58,6 @@ GeneratedTests.dequantize_v1_2 GeneratedTests.dequantize_v1_2_zero_sized GeneratedTests.dequantize_v1_2_zero_sized_float16 GeneratedTests.div_dynamic_nnfw -GeneratedTests.einsum_ex_float_matmul_2x2_2 -GeneratedTests.einsum_ex_float_matmul_3x2_3 -GeneratedTests.einsum_ex_float_matmul_3x3_4 -GeneratedTests.einsum_ex_float_matmul_4x4_4 -GeneratedTests.einsum_ex_float_matmul_4x4_4_2 GeneratedTests.equal_boolean GeneratedTests.equal_dynamic_float_nnfw GeneratedTests.exp_ diff --git a/runtime/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_cl b/runtime/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_cl index 854d6ac801a..46e6862b13f 100644 --- a/runtime/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_cl +++ b/runtime/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_cl @@ -59,11 +59,6 @@ GeneratedTests.dequantize_v1_2 GeneratedTests.dequantize_v1_2_zero_sized GeneratedTests.dequantize_v1_2_zero_sized_float16 GeneratedTests.div_dynamic_nnfw -GeneratedTests.einsum_ex_float_matmul_2x2_2 -GeneratedTests.einsum_ex_float_matmul_3x2_3 -GeneratedTests.einsum_ex_float_matmul_3x3_4 -GeneratedTests.einsum_ex_float_matmul_4x4_4 -GeneratedTests.einsum_ex_float_matmul_4x4_4_2 GeneratedTests.equal_dynamic_float_nnfw GeneratedTests.exp_ GeneratedTests.exp_dynamic_nnfw diff --git a/runtime/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon b/runtime/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon index f636f551009..881a3b63ec9 100644 --- a/runtime/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon +++ b/runtime/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon @@ -58,11 +58,6 @@ GeneratedTests.dequantize_v1_2 GeneratedTests.dequantize_v1_2_zero_sized GeneratedTests.dequantize_v1_2_zero_sized_float16 GeneratedTests.div_dynamic_nnfw -GeneratedTests.einsum_ex_float_matmul_2x2_2 -GeneratedTests.einsum_ex_float_matmul_3x2_3 -GeneratedTests.einsum_ex_float_matmul_3x3_4 -GeneratedTests.einsum_ex_float_matmul_4x4_4 -GeneratedTests.einsum_ex_float_matmul_4x4_4_2 GeneratedTests.equal_boolean GeneratedTests.equal_dynamic_float_nnfw GeneratedTests.exp_ diff --git a/runtime/tests/nnapi/specs/Ex/einsum_ex_float.mod.py b/runtime/tests/nnapi/specs/Ex/einsum_ex_float.mod.py deleted file mode 100644 index 6b247c8b719..00000000000 --- a/runtime/tests/nnapi/specs/Ex/einsum_ex_float.mod.py +++ /dev/null @@ -1,154 +0,0 @@ -def test(name, lhs, rhs, equation, output, lhs_data, rhs_data, output_data): - model = Model().Operation("EINSUM_EX", lhs, rhs, equation).To(output) - example = Example({ - lhs: lhs_data, - rhs: rhs_data, - output: output_data, - }, model=model, name=name) - -test( - name = 'matmul_2x2_2', - lhs = Input("input0", "TENSOR_FLOAT32", "{2, 3}"), - rhs = Input("input1", "TENSOR_FLOAT32", "{3, 2}"), - equation = Parameter("eq", "TENSOR_QUANT8_ASYMM", "{9}, 1.0, 0", - [105, 107, 44, 107, 106, 45, 62, 105, 106]), # ik,kj->ij - lhs_data=[0., 1., 2., 3., 4., 5.], - rhs_data=[0., 3., 1., 4., 2., 5.], - output=Output("output0", "TENSOR_FLOAT32", "{2, 2}"), - output_data=[5., 14., 14., 50.] -) - -# abc,cde->abde -dim_a = 2 -dim_b = 2 -dim_c = 4 -dim_d = 3 -dim_e = 3 - -lhs_value = [x for x in range(dim_a * dim_b * dim_c)] -rhs_value = [x for x in range(dim_c * dim_d * dim_e)] -result_value = [0 for x in range(dim_a * dim_b * dim_d * dim_e)] - -for a in range(dim_a): - for b in range(dim_b): - for d in range(dim_d): - for e in range(dim_e): - result_index = e + dim_e * (d + dim_d * (b + dim_b * a)) - - for c in range(dim_c): - lhs_index = c + dim_c * (b + dim_b * a) - rhs_index = e + dim_e * (d + dim_d * c) - result_value[result_index] = result_value[result_index] + lhs_value[lhs_index] * rhs_value[rhs_index] - -test( - name = 'matmul_3x3_4', - lhs = Input("input0", "TENSOR_FLOAT32", "{2, 2, 4}"), - rhs = Input("input1", "TENSOR_FLOAT32", "{4, 3, 3}"), - equation = Parameter("eq", "TENSOR_QUANT8_ASYMM", "{13}, 1.0, 0", - [97, 98, 99, 44, 99, 100, 101, 45, 62, 97, 98, 100, 101]), # abc,cde->abde - lhs_data=lhs_value, - rhs_data=rhs_value, - output=Output("output0", "TENSOR_FLOAT32", "{2, 2, 3, 3}"), - output_data=result_value -) - -# abc,cd->abd -dim_a = 2 -dim_b = 3 -dim_c = 4 -dim_d = 3 - -lhs_value = [x for x in range(dim_a * dim_b * dim_c)] -rhs_value = [x for x in range(dim_c * dim_d)] -result_value = [0 for x in range(dim_a * dim_b * dim_d)] - -for a in range(dim_a): - for b in range(dim_b): - for d in range(dim_d): - result_index = d + dim_d * (b + dim_b * a) - - for c in range(dim_c): - lhs_index = c + dim_c * (b + dim_b * a) - rhs_index = d + dim_d * c - result_value[result_index] = result_value[result_index] + lhs_value[lhs_index] * rhs_value[rhs_index] - -test( - name = 'matmul_3x2_3', - lhs = Input("input0", "TENSOR_FLOAT32", "{2, 3, 4}"), - rhs = Input("input1", "TENSOR_FLOAT32", "{4, 3}"), - equation = Parameter("eq", "TENSOR_QUANT8_ASYMM", "{11}, 1.0, 0", - [97, 98, 99, 44, 99, 100, 45, 62, 97, 98, 100]), # abc,cd->abd - lhs_data=lhs_value, - rhs_data=rhs_value, - output=Output("output0", "TENSOR_FLOAT32", "{2, 3, 3}"), - output_data=result_value -) - - -# abcd,adbe->acbe -dim_a = 2 -dim_b = 3 -dim_c = 4 -dim_d = 2 -dim_e = 4 - -lhs_value = [x for x in range(dim_a * dim_b * dim_c * dim_d)] -rhs_value = [x for x in range(dim_a * dim_d * dim_b * dim_e)] -result_value = [0 for x in range(dim_a * dim_c * dim_b * dim_e)] - -for a in range(dim_a): - for c in range(dim_c): - for b in range(dim_b): - for e in range(dim_e): - result_index = e + dim_e * (b + dim_b * (c + dim_c * a)) - - for d in range(dim_d): - lhs_index = d + dim_d * (c + dim_c * (b + dim_b * a)) - rhs_index = e + dim_e * (b + dim_b * (d + dim_d * a)) - result_value[result_index] = result_value[result_index] + lhs_value[lhs_index] * rhs_value[rhs_index] - -test( - name = 'matmul_4x4_4', - lhs = Input("input0", "TENSOR_FLOAT32", "{2, 3, 4, 2}"), - rhs = Input("input1", "TENSOR_FLOAT32", "{2, 2, 3, 4}"), - equation = Parameter("eq", "TENSOR_QUANT8_ASYMM", "{15}, 1.0, 0", - [97, 98, 99, 100, 44, 97, 100, 98, 101, 45, 62, 97, 99, 98, 101]), # abcd,adbe->acbe - lhs_data=lhs_value, - rhs_data=rhs_value, - output=Output("output0", "TENSOR_FLOAT32", "{2, 4, 3, 4}"), - output_data=result_value -) - -# abcd,aecd->aceb -dim_a = 2 -dim_b = 3 -dim_c = 2 -dim_d = 4 -dim_e = 3 - -lhs_value = [x for x in range(dim_a * dim_b * dim_c * dim_d)] -rhs_value = [x for x in range(dim_a * dim_e * dim_c * dim_d)] -result_value = [0 for x in range(dim_a * dim_c * dim_e * dim_b)] - -for a in range(dim_a): - for c in range(dim_c): - for b in range(dim_e): - for e in range(dim_b): - result_index = b + dim_b * (e + dim_e * (c + dim_c * a)) - - for d in range(dim_d): - lhs_index = d + dim_d * (c + dim_c * (b + dim_b * a)) - rhs_index = d + dim_d * (c + dim_c * (e + dim_e * a)) - result_value[result_index] = result_value[result_index] + lhs_value[lhs_index] * rhs_value[rhs_index] - -test( - name = 'matmul_4x4_4_2', - lhs = Input("input0", "TENSOR_FLOAT32", "{2, 3, 2, 4}"), - rhs = Input("input1", "TENSOR_FLOAT32", "{2, 3, 2, 4}"), - equation = Parameter("eq", "TENSOR_QUANT8_ASYMM", "{15}, 1.0, 0", - [97, 98, 99, 100, 44, 97, 101, 99, 100, 45, 62, 97, 99, 101, 98]), # abcd,aecd->aceb - lhs_data=lhs_value, - rhs_data=rhs_value, - output=Output("output0", "TENSOR_FLOAT32", "{2, 2, 3, 3}"), - output_data=result_value -)