From 01e3a624c950c1f9bfec1183f9244c131905a9c9 Mon Sep 17 00:00:00 2001
From: Hyeongseok Oh <hseok82.oh@samsung.com>
Date: Thu, 17 Apr 2025 10:27:49 +0900
Subject: [PATCH] [onert] Remove Einsum support

This commit removes einsum support in onert and tests.

ONE-DCO-1.0-Signed-off-by: Hyeongseok Oh <hseok82.oh@samsung.com>
---
 .../cker/include/cker/operation/Einsum.h      | 936 ------------------
 .../include/cker/operation/FusedBatchNorm.h   |   3 +-
 .../include/cker/operation/Helper/BCast.h     |   6 -
 .../cker/operation/Helper/MatmulBCast.h       |  95 --
 runtime/onert/backend/cpu/KernelGenerator.cc  |  19 -
 runtime/onert/backend/cpu/KernelGenerator.h   |   1 -
 runtime/onert/backend/cpu/ops/EinsumLayer.cc  |  74 --
 runtime/onert/backend/cpu/ops/EinsumLayer.h   |  60 --
 .../core/include/ir/Operations.Include.h      |   1 -
 runtime/onert/core/include/ir/Operations.lst  |   1 -
 .../onert/core/include/ir/operation/Einsum.h  |  51 -
 runtime/onert/core/src/ir/operation/Einsum.cc |  31 -
 .../operation/UntrainableOperation.test.cc    |  17 -
 runtime/onert/core/src/loader/BaseLoader.h    |  26 -
 runtime/tests/nnapi/CMakeLists.txt            |   2 +-
 .../nnapi/bridge/include/NeuralNetworksEx.h   |  15 +-
 .../nnapi/bridge/wrapper/OperationFactory.cc  |  23 -
 .../nnapi_gtest.skip.aarch64-android.acl_cl   |   5 -
 .../nnapi_gtest.skip.aarch64-android.acl_neon |   5 -
 .../nnapi_gtest.skip.aarch64-linux.acl_cl     |   5 -
 .../nnapi_gtest.skip.aarch64-linux.acl_neon   |   5 -
 .../nnapi_gtest.skip.armv7l-linux.acl_cl      |   5 -
 .../nnapi_gtest.skip.armv7l-linux.acl_neon    |   5 -
 .../nnapi/specs/Ex/einsum_ex_float.mod.py     | 154 ---
 24 files changed, 4 insertions(+), 1541 deletions(-)
 delete mode 100644 runtime/compute/cker/include/cker/operation/Einsum.h
 delete mode 100644 runtime/compute/cker/include/cker/operation/Helper/MatmulBCast.h
 delete mode 100644 runtime/onert/backend/cpu/ops/EinsumLayer.cc
 delete mode 100644 runtime/onert/backend/cpu/ops/EinsumLayer.h
 delete mode 100644 runtime/onert/core/include/ir/operation/Einsum.h
 delete mode 100644 runtime/onert/core/src/ir/operation/Einsum.cc
 delete mode 100644 runtime/tests/nnapi/specs/Ex/einsum_ex_float.mod.py
diff --git a/runtime/compute/cker/include/cker/operation/Einsum.h b/runtime/compute/cker/include/cker/operation/Einsum.h
deleted file mode 100644
index c6ceebccb91..00000000000
--- a/runtime/compute/cker/include/cker/operation/Einsum.h
+++ /dev/null
@@ -1,936 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNFW_CKER_EINSUM_H__
-#define __NNFW_CKER_EINSUM_H__
-
-#include "cker/Types.h"
-#include "cker/Shape.h"
-#include "cker/Utils.h"
-
-#include "cker/operation/Helper/Tensor.h"
-#include "cker/operation/Helper/MatmulBCast.h"
-
-#include "Transpose.h"
-#include "BatchMatMul.h"
-
-#include <string>
-#include <vector>
-#include <map>
-#include <numeric>
-#include <algorithm>
-
-namespace nnfw
-{
-namespace cker
-{
-
-namespace functor
-{
-
-template <typename Device, typename T, int N> struct StrideFunctor
-{
-  void operator()(const Device &d, typename TTypes<T, N>::ConstTensor input,
-                  const std::vector<int32_t> &strides, typename TTypes<T, N>::Tensor output)
-  {
-
-    Eigen::DSizes<Eigen::DenseIndex, N> dsizes;
-    for (size_t d = 0; d < strides.size(); d++)
-    {
-      dsizes[d] = static_cast<Eigen::DenseIndex>(strides[d]);
-    }
-    for (size_t d = strides.size(); d < N; d++)
-    {
-      dsizes[d] = 1;
-    }
-
-    output.device(d) = input.stride(dsizes);
-  }
-};
-
-template <typename Device, typename T, int N> struct InflateFunctor
-{
-  void operator()(const Device &d, typename TTypes<T, N>::ConstTensor input,
-                  const std::vector<int32_t> &strides, typename TTypes<T, N>::Tensor output)
-  {
-
-    Eigen::DSizes<Eigen::DenseIndex, N> dsizes;
-    for (size_t d = 0; d < strides.size(); d++)
-    {
-      dsizes[d] = static_cast<Eigen::DenseIndex>(strides[d]);
-    }
-    for (size_t d = strides.size(); d < N; d++)
-    {
-      dsizes[d] = 1;
-    }
-
-    output.device(d) = input.inflate(dsizes);
-  }
-};
-
-template <typename Device, typename Reducer> struct ReduceFunctor
-{
-  template <typename OUT_T, typename IN_T, typename ReductionAxes>
-  static void Reduce(const Device &d, OUT_T out, IN_T in, const ReductionAxes &reduction_axes,
-                     const Reducer &reducer)
-  {
-    out.device(d) = in.reduce(reduction_axes, reducer);
-  }
-};
-
-template <typename Device, typename T> struct SetZeroFunctor
-{
-  // Computes on device "d": out = out.setZero(),
-  void operator()(const Device &d, typename TTypes<T>::Flat out)
-  {
-    out.device(d) = out.constant(T(0));
-  }
-};
-
-} // namespace functor
-
-using ShapeVec = std::vector<int32_t>;
-using Labels = std::vector<int32_t>;
-using OperandLabels = std::vector<Labels>;
-using LabelCounts = std::vector<int32_t>;
-using OperandLabelCounts = std::vector<LabelCounts>;
-using LabelToDimSizes = std::vector<int32_t>;
-
-// Each dimension is categorized into exactly one of five types based on
-// whether its corresponding label is present in the input and/or the output
-// subscripts.
-enum DimensionType
-{
-  // Batch dimensions are those present in two inputs as well as the output.
-  // They are part of the batch dimensions during Tensor contraction.
-  // Such dimensions may be broadcasting dimensions (those mapping to
-  // ellipsis)
-  // or explicit batch dimensions corresponding to named axis labels.
-  kBroadcasting = 0,
-  kBatch = 1,
-  // Free dimensions are present in exactly one of the inputs, and also the
-  // output. These are non-contracted axes in the Tensor contraction.
-  kFree = 2,
-  // Contract dimensions are present in two inputs, but not the output. These
-  // dimensions are contracted in Tensor contraction.
-  kContract = 3,
-  // Reduce dimensions are present in exactly one input; and not in the output
-  // and are summed over prior to Tensor contraction.
-  kReduce = 4,
-};
-
-namespace
-{
-
-constexpr int kEllipsisLabel = -1;
-
-std::vector<std::string> strSplit(std::string_view text, std::string_view delimiter)
-{
-  std::vector<std::string> result;
-
-  size_t start = 0;
-  size_t pos = 0;
-
-  do
-  {
-    pos = text.find(delimiter, start);
-    if (pos == std::string::npos)
-    {
-      result.push_back(std::string(text.substr(start, text.size() - start)));
-      break;
-    }
-
-    result.push_back(std::string(text.substr(start, pos - start)));
-    start = pos + delimiter.size();
-  } while (pos != std::string::npos);
-
-  return result;
-}
-
-inline DimensionType getDimensionType(bool is_removed, bool is_unique)
-{
-  if (!is_removed && !is_unique)
-    return kBatch;
-  else if (!is_removed && is_unique)
-    return kFree;
-  else if (is_removed && !is_unique)
-    return kContract;
-  else // is_removed && is_unique
-    return kReduce;
-}
-
-inline Shape copyShape(const Shape &shape)
-{
-  return Shape::ExtendedShape(shape.DimensionsCount(), shape);
-}
-} // namespace
-
-class Einsum
-{
-public:
-  Einsum() : _prepared(false)
-  {
-    // DO NOTHING
-  }
-
-  void prepare(std::string_view equation)
-  {
-    if (_prepared)
-    {
-      return;
-    }
-
-    // Parse equation
-    parseEquation(equation);
-    _prepared = true;
-  }
-
-  void operator()(std::string_view equation, const std::vector<Shape> &input_shapes,
-                  const std::vector<const float *> &input_data, const Shape &output_shape,
-                  float *output_data)
-  {
-    if (!_prepared)
-    {
-      prepare(equation);
-    }
-
-    const int num_inputs = input_shapes.size();
-    std::vector<InputTensor<float>> inputs(num_inputs);
-    for (int i = 0; i < num_inputs; i++)
-    {
-      inputs[i].shape.ReplaceWith(input_shapes[i].DimensionsCount(), input_shapes[i].DimsData());
-      inputs[i].buffer = input_data[i];
-    }
-
-    OperandLabels input_labels(_input_labels);
-    Labels output_labels(_output_labels);
-    std::vector<DimensionType> label_types(_label_types);
-    OperandLabelCounts input_label_counts(_input_label_counts);
-    LabelCounts output_label_counts(_output_label_counts);
-    LabelToDimSizes label_to_dim_sizes;
-
-    processDimensions(inputs, &input_labels, &output_labels, &label_types, &input_label_counts,
-                      &output_label_counts, &label_to_dim_sizes);
-
-    // The reduction phase (a) sums across reduction dimensions, (b) takes
-    // generalized diagonals, and (c) reshapes it into shape
-    //   [(broadcasting) batch shape] + [F,C]
-    // where F and C denote the total (compacted) size of free and contract
-    // dimensions, respectively.
-
-    OperandLabels free_labels(num_inputs);
-    std::vector<Tensor> inputs_reduced(num_inputs);
-    std::vector<bool> swap_free_and_contract(num_inputs);
-    for (int i = 0; i < num_inputs; ++i)
-    {
-      bool temp_swap_free_and_contract = false;
-      reduceOperand<float>(inputs[i], label_types, input_label_counts[i], &input_labels[i],
-                           &free_labels[i], &temp_swap_free_and_contract, &inputs_reduced[i]);
-      swap_free_and_contract[i] = temp_swap_free_and_contract;
-    }
-
-    // After reduction, the inputs should be reshaped to Tensors suitable for
-    // contraction. If num_inputs is 1, the reduced input is simply forwarded to
-    // the output.
-    Tensor contraction_output_reshaped;
-    contractOperands(inputs_reduced, swap_free_and_contract, &contraction_output_reshaped);
-
-    // Copy the batch labels from the contraction output. Recover the batch
-    // shape, which may have been broadcasted.
-    std::vector<int32_t> result_shape_dims(contraction_output_reshaped.shape.DimensionsCount() - 2);
-
-    for (size_t i = 0; i < result_shape_dims.size(); i++)
-    {
-      result_shape_dims[i] = contraction_output_reshaped.shape.Dims(i);
-    }
-
-    int num_labels = label_types.size();
-    Labels result_labels;
-    // All batch dimensions should be present in the contracted result. First
-    // the broadcasting dimensions, then the named batch dimensions.
-    for (int label = 0; label < num_labels; ++label)
-    {
-      if (label_types[label] == kBroadcasting)
-        result_labels.push_back(label);
-    }
-    for (int label = 0; label < num_labels; ++label)
-    {
-      if (label_types[label] == kBatch)
-        result_labels.push_back(label);
-    }
-    for (int i = 0; i < num_inputs; ++i)
-    {
-      for (auto &&label : free_labels[i])
-      {
-        result_labels.push_back(label);
-        result_shape_dims.push_back(label_to_dim_sizes[label]);
-      }
-    }
-
-    Shape result_shape(result_shape_dims.size(), result_shape_dims.data());
-
-    // Reshape the contraction (or reduction) result to its expanded shape:
-    // [(broadcasted) batch shape] + [free shape 0] + [free shape 1].
-    Tensor contraction_output;
-    copyFrom(contraction_output_reshaped, result_shape, &contraction_output);
-
-    // Inflate the output if necessary. (E.g. for the equation 'i->iii' which
-    // may arise while computing gradient of a regular Einsum).
-    // TODO(anudhyan): It's possible that Eigen's contract and inflate can be
-    // chained here to avoid materializing an intermediate.
-    Tensor output_inflated;
-    strideOrInflate<float>(contraction_output, result_labels, output_label_counts,
-                           true /* should_inflate */, &output_inflated);
-
-    if (output_inflated.shape.DimensionsCount() > contraction_output.shape.DimensionsCount())
-    {
-      // We inflated the output. Modify result labels accordingly.
-      Labels inflated_labels;
-      for (auto &&label : result_labels)
-      {
-        inflated_labels.insert(inflated_labels.end(), output_label_counts[label], label);
-      }
-      result_labels.swap(inflated_labels);
-    }
-
-    // Find the permutation to map the result labels to the output labels. Note
-    // that both the result and the final output may have the repeated labels,
-    // in which case the permutation preserves the left-to-right ordering.
-    // E.g. if result labels are [0, 0, 1] and output is [0, l, 0] then the
-    // permutation should be [0, 2, 1]. We also use the fact that repeated
-    // labels in the result are adjacent to each other.
-    std::vector<int32_t> output_permutation(output_labels.size());
-    std::vector<int32_t> label_to_position(num_labels, -1);
-    for (size_t i = 0; i < result_labels.size(); ++i)
-    {
-      // Remember the position of only the leftmost result label.
-      if (label_to_position[result_labels[i]] == -1)
-      {
-        label_to_position[result_labels[i]] = i;
-      }
-    }
-    for (size_t i = 0; i < output_labels.size(); ++i)
-    {
-      output_permutation[i] = label_to_position[output_labels[i]];
-      // We have found the leftmost occurrence. The next one would be adjacent.
-      label_to_position[output_labels[i]] += 1;
-    }
-
-    InputTensor<float> temp_inflated;
-    temp_inflated.shape.ReplaceWith(output_inflated.shape.DimensionsCount(),
-                                    output_inflated.shape.DimsData());
-    temp_inflated.buffer = (reinterpret_cast<const float *>(output_inflated.buffer));
-    ;
-
-    Tensor output;
-    transposeOperand<float>(temp_inflated, output_permutation, &output);
-
-    memcpy(output_data, output.buffer, output_shape.FlatSize() * sizeof(float));
-
-    temp_operand.clear();
-  }
-
-private:
-  void parseEquation(std::string_view equation)
-  {
-    std::vector<std::string> input_str;
-    std::string output_str;
-
-    parseEinsumEquation(equation, input_str, output_str);
-
-    // Temporary map from single character labels to (consecutive) integer
-    // labels.
-    std::map<char, int> label_mapping;
-    int num_inputs = input_str.size();
-    _input_labels.resize(num_inputs);
-
-    // Map from single characters to integer labels.
-    for (int i = 0; i < num_inputs; ++i)
-    {
-      mapToLabels(input_str[i], _input_labels.at(i), label_mapping);
-    }
-    mapToLabels(output_str, _output_labels, label_mapping);
-
-    // Compute counts for input and output labels.
-    int num_labels = label_mapping.size();
-    _input_label_counts.resize(num_inputs);
-    _input_has_ellipsis.resize(num_inputs);
-    for (int i = 0; i < num_inputs; ++i)
-    {
-      _input_label_counts.at(i).resize(num_labels);
-      for (const int label : _input_labels.at(i))
-      {
-        if (label != kEllipsisLabel)
-          _input_label_counts.at(i)[label] += 1;
-        else
-          _input_has_ellipsis.at(i) = true;
-      }
-    }
-    _output_label_counts.resize(num_labels);
-    for (const int label : _output_labels)
-    {
-      if (label != kEllipsisLabel)
-        _output_label_counts.at(label) += 1;
-      else
-        _output_has_ellipsis = true;
-    }
-
-    // Map each label to a unique DimensionType.
-    _label_types.resize(num_labels);
-    for (int label = 0; label < num_labels; ++label)
-    {
-      bool removed = (_output_label_counts[label] == 0);
-      bool unique =
-        num_inputs == 1 || _input_label_counts[0][label] == 0 || _input_label_counts[1][label] == 0;
-      _label_types[label] = getDimensionType(removed, unique);
-    }
-  }
-
-  void parseEinsumEquation(std::string_view &equation, std::vector<std::string> &input_subscripts,
-                           std::string &output_subscript)
-  {
-    std::vector<std::string> inputs_and_output_subscripts = strSplit(equation, "->");
-    if (inputs_and_output_subscripts.size() != 2)
-    {
-      throw std::runtime_error{"Einsum: Expecting exactly one '->' in einsum equation: " +
-                               std::string(equation)};
-    }
-
-    output_subscript = inputs_and_output_subscripts[1];
-    input_subscripts = strSplit(inputs_and_output_subscripts[0], ",");
-    if (input_subscripts.size() != 1 && input_subscripts.size() != 2)
-    {
-      throw std::runtime_error{"Einsum: Expecting 1 or 2 input subscripts in equation '" +
-                               std::string(equation) +
-                               "' but got: " + std::to_string(input_subscripts.size())};
-    }
-  }
-
-  // Maps the character labels to consecutive integers.
-  void mapToLabels(std::string_view subscript, Labels &labels, std::map<char, int> &label_mapping)
-  {
-    for (size_t i = 0; i < subscript.size(); ++i)
-    {
-      const char label_char = subscript[i];
-      if (label_char == '.')
-      {
-        labels.push_back(kEllipsisLabel);
-        i += 2; // Skip next 2 characters as well.
-        continue;
-      }
-      if (label_mapping.find(label_char) == label_mapping.end())
-      {
-        const int next_label = label_mapping.size();
-        label_mapping[label_char] = next_label;
-      }
-      const int mapped_label = label_mapping[label_char];
-      labels.push_back(mapped_label);
-    }
-  }
-
-  template <typename T>
-  void processDimensions(const std::vector<InputTensor<T>> &inputs, OperandLabels *input_labels,
-                         Labels *output_labels, std::vector<DimensionType> *label_types,
-                         OperandLabelCounts *input_label_counts, LabelCounts *output_label_counts,
-                         LabelToDimSizes *label_to_dim_sizes)
-  {
-    if (inputs.size() != input_labels->size())
-    {
-      throw std::runtime_error{"Expected " + std::to_string(input_labels->size()) +
-                               " inputs but got: " + std::to_string(inputs.size())};
-    }
-    const int num_inputs = inputs.size();
-
-    // We infer the number of broadcasting dimensions by taking the maximum rank
-    // among the broadcasting subshapes of the input.
-    int max_bcast_dims = 0;
-    const int num_named_labels = label_types->size();
-    label_to_dim_sizes->resize(num_named_labels);
-    for (int i = 0; i < num_inputs; ++i)
-    {
-      Labels *labels = &(*input_labels)[i];
-
-      if (!_input_has_ellipsis[i])
-      {
-        if (inputs[i].shape.DimensionsCount() != ((int32_t)labels->size()))
-        {
-          throw std::runtime_error{"Expected input " + std::to_string(i) + " to have rank " +
-                                   std::to_string(labels->size()) + " but got: " +
-                                   std::to_string(inputs[i].shape.DimensionsCount())};
-        }
-        for (size_t label_idx = 0; label_idx < labels->size(); ++label_idx)
-        {
-          const int label = (*labels)[label_idx];
-          recordLabelToDimension(label, label_idx, inputs[i].shape, label_to_dim_sizes);
-        }
-        continue;
-      }
-
-      // Input has an ellipsis.
-      if (inputs[i].shape.DimensionsCount() + 1 < (int32_t)labels->size())
-      {
-        throw std::runtime_error{"Expected input " + std::to_string(i) + " to have rank at least " +
-                                 std::to_string(labels->size() - 1) +
-                                 " but got: " + std::to_string(inputs[i].shape.DimensionsCount())};
-      }
-      int ellipsis_axis = -1;
-      const int num_bcast_dims = inputs[i].shape.DimensionsCount() - labels->size() + 1;
-      for (size_t label_idx = 0; label_idx < labels->size(); ++label_idx)
-      {
-        const int label = (*labels)[label_idx];
-        if (label == kEllipsisLabel)
-        {
-          ellipsis_axis = label_idx;
-          continue;
-        }
-        // Current label is not an ellipsis.
-        const int axis = label_idx + (ellipsis_axis == -1 ? 0 : num_bcast_dims - 1);
-        recordLabelToDimension(label, axis, inputs[i].shape, label_to_dim_sizes);
-      }
-      // Found an ellipsis. Replace 'kEllipsisLabel' with broadcasting
-      // dimensions.
-      if (ellipsis_axis != -1)
-      {
-        insertBroadcastLabels(num_bcast_dims, num_named_labels, ellipsis_axis, labels,
-                              &input_label_counts->at(i));
-        max_bcast_dims = std::max(max_bcast_dims, num_bcast_dims);
-      }
-    }
-
-    std::vector<bool>::iterator it_input =
-      std::find(_input_has_ellipsis.begin(), _input_has_ellipsis.end(), true);
-    if (it_input == _input_has_ellipsis.end() && !_output_has_ellipsis)
-    {
-      return;
-    }
-    // Insert broadcasting dimensions in the output labels.
-    auto it = std::find(output_labels->begin(), output_labels->end(), kEllipsisLabel);
-    if (it != output_labels->end())
-    {
-      const int ellipsis_axis = it - output_labels->begin();
-      insertBroadcastLabels(max_bcast_dims, num_named_labels, ellipsis_axis, output_labels,
-                            output_label_counts);
-    }
-    else if (max_bcast_dims > 0)
-    {
-      std::runtime_error{"Output contains " + std::to_string(max_bcast_dims) +
-                         " broadcasting dimension(s) but no ellipsis " +
-                         "(...) was found in the output subscripts."};
-    }
-    // Populate DimensionType for the new broadcasting labels.
-    label_types->resize(num_named_labels + max_bcast_dims, kBroadcasting);
-  }
-
-  void recordLabelToDimension(const int32_t label, const int axis, const Shape &input_shape,
-                              LabelToDimSizes *label_to_dim_sizes)
-  {
-    const int32_t input_dim = input_shape.Dims(axis);
-    // We know that label_to_dim_sizes has the size to accommodate named labels.
-    if (label_to_dim_sizes->at(label) != 0 && label_to_dim_sizes->at(label) != input_dim)
-    {
-      std::runtime_error{"Expected dimension " + std::to_string(label_to_dim_sizes->at(label)) +
-                         " at axis " + std::to_string(axis) +
-                         " of the input shaped but got dimension " + std::to_string(input_dim)};
-    }
-    (*label_to_dim_sizes)[label] = input_dim;
-  }
-
-  void insertBroadcastLabels(int num_bcast_dims, int num_named_labels, int ellipsis_axis,
-                             Labels *labels, LabelCounts *label_counts)
-  {
-    labels->erase(labels->begin() + ellipsis_axis);
-    labels->insert(labels->begin() + ellipsis_axis, num_bcast_dims, 0);
-    std::iota(labels->begin() + ellipsis_axis, labels->begin() + ellipsis_axis + num_bcast_dims,
-              num_named_labels);
-    // Increment label counts. Since these are new labels, the count is set
-    // to 1.
-    label_counts->resize(num_named_labels + num_bcast_dims, 1);
-  }
-
-  template <typename T>
-  void reduceOperand(const InputTensor<T> &input, const std::vector<DimensionType> &label_types,
-                     const LabelCounts &label_counts, Labels *labels, Labels *free_labels,
-                     bool *swap_free_and_contract, Tensor *output)
-  {
-    // Find the permutation to transpose the input dimensions in the order of
-    // DimensionType; i.e. batch, free, contract and reduce dimensions. This
-    // makes it more convenient to invoke Reduce/Contract operations.
-    std::vector<int32_t> permutation(input.shape.DimensionsCount());
-    std::iota(permutation.begin(), permutation.end(), 0);
-    Tensor input_transposed;
-
-    // Check if we can avoid the transpose. We need to flip the adj_x (or adj_y)
-    // flag during BatchMatMul. This is an extra optimization not necessary for
-    // correctness.
-    if (shouldSwapFreeAndContract(*labels, label_types))
-    {
-      *swap_free_and_contract = true;
-    }
-    else
-    {
-      std::sort(permutation.begin(), permutation.end(), [&](int i, int j) {
-        int label_i = (*labels)[i];
-        int label_j = (*labels)[j];
-        return std::tie(label_types[label_i], label_i) < std::tie(label_types[label_j], label_j);
-      });
-    }
-    // Transpose the input so that DimensionTypes are in order.
-    transposeOperand<T>(input, permutation, &input_transposed);
-
-    permuteLabels(permutation, labels);
-
-    // Take the generalized diagonal for dimensions with repeated axis labels.
-    Tensor input_deduped;
-    labels->erase(std::unique(labels->begin(), labels->end()), labels->end());
-    strideOrInflate<T>(input_transposed, *labels, label_counts, false /* should_inflate */,
-                       &input_deduped);
-
-    // Reshape denotes the rank-5 shape [broadcast, batch, free, contract,
-    // reduce] where we've compacted the dimensions of each DimensionType.
-    std::vector<int32_t> reshape(5, 1);
-
-    // The output shape is [batch shape] + [free size, contract size]
-    // That is, the batch shape is preserved (for broadcasting while
-    // contracting) while the free dims and contract dims are compressed to one
-    // dimension each.
-    Shape output_shape;
-    std::vector<int32_t> output_shape_dims;
-    for (size_t label_idx = 0; label_idx < labels->size(); ++label_idx)
-    {
-      const int label = labels->at(label_idx);
-      int32_t dim = input_deduped.shape.Dims(label_idx);
-      if (label_types[label] == kBroadcasting || label_types[label] == kBatch)
-      {
-        output_shape_dims.push_back(dim);
-      }
-      else if (label_types[label] == kFree)
-      {
-        free_labels->push_back(label);
-      }
-      reshape[label_types[label]] *= dim;
-    }
-
-    if (*swap_free_and_contract)
-      std::swap(reshape[kFree], reshape[kContract]);
-
-    output_shape_dims.push_back(reshape[kFree]);
-    output_shape_dims.push_back(reshape[kContract]);
-
-    output_shape.ReplaceWith(output_shape_dims.size(), output_shape_dims.data());
-
-    if (reshape[kReduce] == 1)
-    { // No need to actually reduce.
-      return copyFrom(input_deduped, output_shape, output);
-    }
-
-    allocateTemp(output_shape, output);
-
-    using Reducer = Eigen::internal::SumReducer<T>;
-    using Index = typename TTypes<T>::Tensor::Index;
-
-    const Eigen::ThreadPoolDevice &device = *eigen_support::GetThreadPoolDevice();
-
-    // Reduce along the last axis (i.e axis 1) of the rank-2 Tensor.
-    const int32_t output_size =
-      reshape[kBroadcasting] * reshape[kBatch] * reshape[kFree] * reshape[kContract];
-    functor::ReduceFunctor<Eigen::ThreadPoolDevice, Reducer>::Reduce(
-      device, output->shaped<T, 1>({output_size}),
-      input_deduped.shaped<T, 2>({output_size, reshape[kReduce]}), Eigen::array<Index, 1>({1}),
-      Reducer());
-  }
-
-  bool shouldSwapFreeAndContract(const Labels &labels,
-                                 const std::vector<DimensionType> &label_types)
-  {
-    // Check that ordering is according to dimension type, with the role of
-    // free and contract dimensions swapped.
-    std::vector<int> remap = {0, 1, 3, 2, 4};
-    for (size_t i = 0; i + 1 < labels.size(); ++i)
-    {
-      const int dimtype_a = remap[label_types[labels[i]]];
-      const int dimtype_b = remap[label_types[labels[i + 1]]];
-      if (dimtype_a > dimtype_b || (dimtype_a == dimtype_b && labels[i] > labels[i + 1]))
-      {
-        return false;
-      }
-    }
-    return true;
-  }
-
-  template <typename T>
-  void transposeOperand(const InputTensor<T> &input, const std::vector<int32_t> &permutation,
-                        Tensor *output)
-  {
-    if (!shouldTranspose(input.shape, permutation))
-    {
-      copyFrom(input, input.shape, output);
-      return;
-    }
-    Shape transposed_shape(input.shape.DimensionsCount());
-    for (int i = 0; i < input.shape.DimensionsCount(); ++i)
-    {
-      transposed_shape.SetDim(i, input.shape.Dims(permutation[i]));
-    }
-    // For empty Tensors, just change the shape. E.g. we may need to transpose
-    // from shape [1, 0, 5] to [5, 1, 0].
-    if (input.shape.FlatSize() == 0)
-    {
-      copyFrom(input, transposed_shape, output);
-      return;
-    }
-
-    temp_operand.emplace_back(std::make_unique<T[]>(transposed_shape.FlatSize()));
-    T *new_buffer = temp_operand.back().get();
-
-    TransposeParams transpose_params;
-    transpose_params.perm_count = permutation.size();
-    for (size_t i = 0; i < permutation.size(); i++)
-    {
-      transpose_params.perm[i] = permutation[i];
-    }
-
-    Transpose<T>(transpose_params, input.shape, input.buffer, transposed_shape, new_buffer);
-
-    output->shape.ReplaceWith(transposed_shape.DimensionsCount(), transposed_shape.DimsData());
-    output->buffer = new_buffer;
-  }
-
-  bool shouldTranspose(const Shape &input_shape, const std::vector<int32_t> &permutation)
-  {
-    if (input_shape.DimensionsCount() < 2)
-      return false;
-    for (size_t i = 0; i < permutation.size(); ++i)
-    {
-      if (permutation[i] != (int32_t)i)
-        return true;
-    }
-    return false;
-  }
-
-  template <typename T>
-  void copyFrom(const InputTensor<T> &input, const Shape &shape, Tensor *output)
-  {
-    Tensor temp_tensor;
-    temp_tensor.shape.ReplaceWith(input.shape.DimensionsCount(), input.shape.DimsData());
-    temp_operand.emplace_back(std::make_unique<float[]>(input.shape.FlatSize()));
-    temp_tensor.buffer = temp_operand.back().get();
-    memcpy(temp_tensor.buffer, input.buffer, input.shape.FlatSize() * sizeof(float));
-
-    copyFrom(temp_tensor, shape, output);
-  }
-
-  void copyFrom(const Tensor &input, const Shape &shape, Tensor *output)
-  {
-    if (output->copyFrom(input, shape))
-      return;
-
-    throw std::runtime_error{"Einsum: Encountered error while reshaping a Tensor"};
-  }
-
-  // Permutes the labels according to the given permutation.
-  void permuteLabels(const std::vector<int32_t> &permutation, Labels *labels)
-  {
-    Labels permuted_labels(labels->size());
-    for (size_t i = 0; i < labels->size(); ++i)
-    {
-      permuted_labels[i] = (*labels)[permutation[i]];
-    }
-    labels->swap(permuted_labels);
-  }
-
-  // If there are repeated labels in either the input or output, then this
-  // strides the input (e.g. iii->i) or inflates it (e.g. i->iii), respectively.
-  template <typename T>
-  void strideOrInflate(const Tensor &input, const Labels &labels, const LabelCounts &label_counts,
-                       const bool should_inflate, Tensor *output)
-  {
-    // Return early if there are no repeated indices.
-    if (std::all_of(label_counts.begin(), label_counts.end(), [](int c) { return c <= 1; }))
-    {
-      return copyFrom(input, input.shape, output);
-    }
-    // We reshape so that each repeated label is compressed to one dimension.
-    // E.g. For iiij -> ij, The shape [3, 3, 3, 5] would be compressed to [27,
-    // 5]. Striding appropriately (in this case with strides 14 (=1+3+9) and 1)
-    // recovers the generalized diagonal of shape [3, 5].
-    std::vector<int32_t> reshape;
-    std::vector<int32_t> strides;
-    // Strided and inflated shapes correspond to input and output shapes,
-    // respectively, should_inflate is true (vice-versa if should_inflate is
-    // false). E.g. they are [3, 5] and [3, 3, 3, 5] in the above example.
-    Shape strided_shape;
-    Shape inflated_shape;
-    std::vector<int32_t> strided_shape_dims;
-    std::vector<int32_t> inflated_shape_dims;
-    for (auto &&label : labels)
-    {
-      const int32_t count = label_counts[label];
-      const int current_axis =
-        should_inflate ? strided_shape_dims.size() : inflated_shape_dims.size();
-      const int32_t dim = input.shape.Dims(current_axis);
-      strided_shape_dims.push_back(dim);
-      inflated_shape_dims.insert(inflated_shape_dims.end(), count, dim);
-      const int32_t reshape_dim = std::pow(dim, count);
-      reshape.push_back(reshape_dim);
-      // While taking the d-diagonal in a rank k Tensor, we take d
-      // equally-spaced elements including the first and last element. Then, (k
-      // - 1) * stride = d^k - 1, or, stride = (d^k - 1)/(d - 1).
-      const int32_t stride = (dim > 1 && count > 1) ? (reshape_dim - 1) / (dim - 1) : 1;
-      strides.push_back(stride);
-    }
-
-    strided_shape.ReplaceWith(strided_shape_dims.size(), strided_shape_dims.data());
-    inflated_shape.ReplaceWith(inflated_shape_dims.size(), inflated_shape_dims.data());
-
-    Shape output_shape = Shape(should_inflate ? inflated_shape : strided_shape);
-
-    output->shape.ReplaceWith(output_shape.DimensionsCount(), output_shape.DimsData());
-    temp_operand.emplace_back(std::make_unique<float[]>(output_shape.FlatSize()));
-    output->buffer = temp_operand.back().get();
-
-    const Eigen::ThreadPoolDevice &device = *eigen_support::GetThreadPoolDevice();
-
-    switch (reshape.size())
-    {
-#define NDIMS_CASE(N)                                                                      \
-  case N:                                                                                  \
-  {                                                                                        \
-    if (should_inflate)                                                                    \
-    {                                                                                      \
-      auto output_map = output->shaped<T, N>(reshape);                                     \
-      auto input_map = input.shaped<T, N>(strided_shape_dims);                             \
-      functor::InflateFunctor<Eigen::ThreadPoolDevice, T, N>()(device, input_map, strides, \
-                                                               output_map);                \
-    }                                                                                      \
-    else                                                                                   \
-    {                                                                                      \
-      auto input_map = input.shaped<T, N>(reshape);                                        \
-      auto output_map = output->shaped<T, N>(strided_shape_dims);                          \
-      functor::StrideFunctor<Eigen::ThreadPoolDevice, T, N>()(device, input_map, strides,  \
-                                                              output_map);                 \
-    }                                                                                      \
-  }                                                                                        \
-  break;
-      NDIMS_CASE(1);
-      NDIMS_CASE(2);
-      NDIMS_CASE(3);
-      NDIMS_CASE(4);
-      NDIMS_CASE(5);
-      NDIMS_CASE(6);
-      default:
-        throw std::runtime_error{"Unsupported rank: " + std::to_string(reshape.size()) +
-                                 " while handling repeated indices. Up to rank 6 is supported."};
-#undef NDIMS_CASE
-    }
-  }
-
-  void allocateTemp(const Shape &shape, Tensor *output)
-  {
-    output->shape.ReplaceWith(shape.DimensionsCount(), shape.DimsData());
-    temp_operand.emplace_back(std::make_unique<float[]>(shape.FlatSize()));
-    output->buffer = temp_operand.back().get();
-  }
-
-  // Contracts the inputs along the last axis. (or the second last if the
-  // corresponding value of swap_free_and_contract is true). The batch
-  // dimensions are broadcast to the output shape.
-  // TODO(anudhyan): Factor this function into a BatchMatMul functor and support
-  // transpose_x and transpose_y attributes (in addition to adj_x and adj_y).
-  // Also, the BatchMatMul might devolve into a component-wise multiplication
-  // when the matrix shape is [1,1]; in this case BatchMatMul functor would be
-  // very inefficient. The functor should detect if this is the case and perform
-  // componentwise multiplication functor instead.
-  void contractOperands(std::vector<Tensor> &inputs, std::vector<bool> &swap_free_and_contract,
-                        Tensor *output)
-  {
-    if (inputs.size() == 1)
-      return copyFrom(inputs[0], inputs[0].shape, output);
-
-    MatMulBCast bcast(inputs[0].shape, inputs[1].shape);
-    if (!bcast.IsValid())
-    {
-      throw std::runtime_error{"Einsum: Invalid broadcasting dimensions"};
-    }
-
-    Tensor lhs;
-    reshapeToRank3(inputs[0], bcast.x_batch_size(), &lhs);
-    Tensor rhs;
-    reshapeToRank3(inputs[1], bcast.y_batch_size(), &rhs);
-    Shape old_output_shape = bcast.output_batch_shape();
-    Shape output_shape(static_cast<int>(old_output_shape.DimensionsCount() + inputs.size()));
-    for (int i = 0; i < old_output_shape.DimensionsCount(); i++)
-    {
-      output_shape.SetDim(i, old_output_shape.Dims(i));
-    }
-
-    for (size_t i = 0; i < inputs.size(); ++i)
-    {
-      const int32_t free_axis =
-        inputs[i].shape.DimensionsCount() - (swap_free_and_contract[i] ? 1 : 2);
-      output_shape.SetDim(i + old_output_shape.DimensionsCount(), inputs[i].shape.Dims(free_axis));
-    }
-    bool adj_x = swap_free_and_contract[0];
-    bool adj_y = !swap_free_and_contract[1];
-
-    allocateTemp(output_shape, output);
-
-    const Eigen::ThreadPoolDevice &device = *eigen_support::GetThreadPoolDevice();
-
-    if (lhs.shape.FlatSize() == 0 || rhs.shape.FlatSize() == 0)
-    {
-      functor::SetZeroFunctor<Eigen::ThreadPoolDevice, float> set_zero;
-      set_zero(device,
-               typename TTypes<float, 1>::Tensor(output->base<float>(), output->shape.FlatSize()));
-      return;
-    }
-
-    Tensor output_reshaped;
-    reshapeToRank3(*output, bcast.output_batch_size(), &output_reshaped);
-
-    // LaunchBatchMatMul::Launch(lhs, rhs, adj_x, adj_y, bcast, &output_reshaped);
-    BatchMatMul batchMatMul;
-    // Set rhs is not constant: don't use optimization
-    batchMatMul.prepare(lhs.shape, rhs.shape, adj_x, adj_y, false);
-    batchMatMul(lhs.shape, lhs.base<float>(), rhs.shape, rhs.base<float>(), adj_x, adj_y,
-                output_reshaped.shape, output_reshaped.base<float>());
-  }
-
-  void reshapeToRank3(const Tensor &input, int batch_size, Tensor *output)
-  {
-    const int rank = input.shape.DimensionsCount();
-    Shape output_shape({batch_size, input.shape.Dims(rank - 2), input.shape.Dims(rank - 1)});
-    copyFrom(input, output_shape, output);
-  }
-
-private:
-  bool _prepared;
-
-  OperandLabels _input_labels;
-  Labels _output_labels;
-  std::vector<DimensionType> _label_types;
-  OperandLabelCounts _input_label_counts;
-  LabelCounts _output_label_counts;
-  std::vector<bool> _input_has_ellipsis;
-  bool _output_has_ellipsis = false;
-
-  std::vector<std::unique_ptr<float[]>> temp_operand;
-};
-
-} // namespace cker
-} // namespace nnfw
-
-#endif // __NNFW_CKER_EINSUM_H__
diff --git a/runtime/compute/cker/include/cker/operation/FusedBatchNorm.h b/runtime/compute/cker/include/cker/operation/FusedBatchNorm.h
index 88d48fc0114..74bda43f746 100644
--- a/runtime/compute/cker/include/cker/operation/FusedBatchNorm.h
+++ b/runtime/compute/cker/include/cker/operation/FusedBatchNorm.h
@@ -23,7 +23,6 @@
 #include "cker/Utils.h"
 
 #include "cker/operation/Helper/Tensor.h"
-#include "cker/operation/Helper/MatmulBCast.h"
 
 #include "Transpose.h"
 #include "BatchMatMul.h"
@@ -146,7 +145,7 @@ class FusedBatchNorm
     if (output->copyFrom(input, shape))
       return;
 
-    throw std::runtime_error{"Einsum: Encountered error while reshaping a Tensor"};
+    throw std::runtime_error{"FusedBatchNorm: Encountered error while reshaping a Tensor"};
   }
 
 private:
diff --git a/runtime/compute/cker/include/cker/operation/Helper/BCast.h b/runtime/compute/cker/include/cker/operation/Helper/BCast.h
index 211db98cee0..c7173757617 100644
--- a/runtime/compute/cker/include/cker/operation/Helper/BCast.h
+++ b/runtime/compute/cker/include/cker/operation/Helper/BCast.h
@@ -18,12 +18,6 @@
 #ifndef __NNFW_CKER_HELPER_BCAST_H__
 #define __NNFW_CKER_HELPER_BCAST_H__
 
-/**
- * ToDo : This file will be moved into upper folder when integrate with other
- *        custom operations.
- *        And It should merged with EinsumHelper's BCast.
- **/
-
 #include "cker/Shape.h"
 #include "cker/eigen/EigenSupport.h"
 
diff --git a/runtime/compute/cker/include/cker/operation/Helper/MatmulBCast.h b/runtime/compute/cker/include/cker/operation/Helper/MatmulBCast.h
deleted file mode 100644
index b7d63943304..00000000000
--- a/runtime/compute/cker/include/cker/operation/Helper/MatmulBCast.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNFW_CKER_EINSUM_HELPER_MATMUL_BCAST_H__
-#define __NNFW_CKER_EINSUM_HELPER_MATMUL_BCAST_H__
-
-#include <vector>
-#include <memory>
-#include <numeric>
-
-#include "BCast.h"
-#include "cker/Shape.h"
-
-namespace nnfw
-{
-namespace cker
-{
-
-// Simple wrapper over BCast specialized for MatMul.
-// Provides utilities for broadcasting across batch dimensions for binary
-// MatMul-like operations.
-
-// Fix: Use Shape directly instead of Vec
-class MatMulBCast
-{
-public:
-  MatMulBCast(Shape &shape_x, Shape &shape_y)
-  {
-    if (shape_x.DimensionsCount() < 2 || shape_y.DimensionsCount() < 2)
-      return;
-
-    std::vector<int32_t> x;
-    std::vector<int32_t> y;
-
-    x.resize(shape_x.DimensionsCount() - 2);
-    y.resize(shape_y.DimensionsCount() - 2);
-
-    for (size_t i = 0; i < x.size(); i++)
-    {
-      x[i] = shape_x.Dims(i);
-    }
-    for (size_t i = 0; i < y.size(); i++)
-    {
-      y[i] = shape_y.Dims(i);
-    }
-
-    _batch_bcast = std::make_unique<BCast>(std::move(x), std::move(y));
-    if (!_batch_bcast->IsValid())
-      return;
-
-    const auto &x_reshaped = _batch_bcast->x_reshape();
-    const auto &y_reshaped = _batch_bcast->y_reshape();
-    auto output_shape = _batch_bcast->output_shape();
-
-    _x_batch_size = std::accumulate(x_reshaped.cbegin(), x_reshaped.cend(), INT32_C(1),
-                                    std::multiplies<int32_t>());
-    _y_batch_size = std::accumulate(y_reshaped.cbegin(), y_reshaped.cend(), INT32_C(1),
-                                    std::multiplies<int32_t>());
-    _output_shape.ReplaceWith(output_shape.size(), output_shape.data());
-    _output_batch_size = _output_shape.FlatSize();
-  }
-
-  bool IsValid() const { return (_batch_bcast != nullptr) && _batch_bcast->IsValid(); }
-  int32_t x_batch_size() const { return _x_batch_size; }
-  int32_t y_batch_size() const { return _y_batch_size; }
-  int32_t output_batch_size() const { return _output_batch_size; }
-  const Shape &output_batch_shape() const { return _output_shape; }
-
-private:
-  std::unique_ptr<BCast> _batch_bcast;
-
-  int32_t _x_batch_size;
-  int32_t _y_batch_size;
-  Shape _output_shape;
-  int32_t _output_batch_size;
-};
-
-} // namespace cker
-} // namespace nnfw
-
-#endif // __NNFW_CKER_EINSUM_HELPER_MATMUL_BCAST_H__
diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc
index a4a8459c129..7030c759a2a 100644
--- a/runtime/onert/backend/cpu/KernelGenerator.cc
+++ b/runtime/onert/backend/cpu/KernelGenerator.cc
@@ -25,7 +25,6 @@
 #include "ops/ConvolutionLayer.h"
 #include "ops/DepthToSpaceLayer.h"
 #include "ops/DepthwiseConvolutionLayer.h"
-#include "ops/EinsumLayer.h"
 #include "ops/ElementwiseActivationLayer.h"
 #include "ops/ElementwiseBinaryLayer.h"
 #include "ops/ElementwiseUnaryLayer.h"
@@ -602,24 +601,6 @@ void KernelGenerator::visit(const ir::operation::OneHot &node)
   _return_fn = std::move(fn);
 }
 
-void KernelGenerator::visit(const ir::operation::Einsum &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-
-  auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
-  std::vector<const IPortableTensor *> input_tensors;
-  for (const auto &ifm_idx : node.getInputs())
-    input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
-
-  const auto &equation = node.param().equation;
-
-  auto fn = std::make_unique<ops::EinsumLayer>();
-
-  fn->configure(input_tensors, equation, output_tensor);
-
-  _return_fn = std::move(fn);
-}
-
 void KernelGenerator::visit(const ir::operation::Custom &node)
 {
   auto fill_op_info = [&](const ir::OperandIndexSequence &opSeq,
diff --git a/runtime/onert/backend/cpu/KernelGenerator.h b/runtime/onert/backend/cpu/KernelGenerator.h
index b8ed5cb0a4a..5ead87e61f6 100644
--- a/runtime/onert/backend/cpu/KernelGenerator.h
+++ b/runtime/onert/backend/cpu/KernelGenerator.h
@@ -52,7 +52,6 @@ class KernelGenerator : public basic::KernelGeneratorBase
   void visit(const ir::operation::Custom &node) override;
   void visit(const ir::operation::DepthToSpace &) override;
   void visit(const ir::operation::DepthwiseConv2D &) override;
-  void visit(const ir::operation::Einsum &) override;
   void visit(const ir::operation::ElementwiseActivation &) override;
   void visit(const ir::operation::ElementwiseBinary &) override;
   void visit(const ir::operation::ElementwiseUnary &) override;
diff --git a/runtime/onert/backend/cpu/ops/EinsumLayer.cc b/runtime/onert/backend/cpu/ops/EinsumLayer.cc
deleted file mode 100644
index 14c4ccca650..00000000000
--- a/runtime/onert/backend/cpu/ops/EinsumLayer.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "EinsumLayer.h"
-
-#include <cker/operation/Einsum.h>
-
-namespace onert::backend::cpu::ops
-{
-
-EinsumLayer::EinsumLayer()
-  : _inputs(), _output(nullptr), _equation(), _einsum_kernel(new nnfw::cker::Einsum())
-{
-  // DO NOTHING
-}
-
-EinsumLayer::~EinsumLayer() = default;
-
-void EinsumLayer::einsumFloat32()
-{
-  uint32_t num_inputs = _inputs.size();
-  nnfw::cker::Einsum &kernel = *_einsum_kernel;
-
-  kernel.prepare(_equation);
-
-  std::vector<nnfw::cker::Shape> inputShapes;
-  std::vector<const float *> inputFloatPtrs;
-
-  for (uint32_t i = 0; i < num_inputs; i++)
-  {
-    inputShapes.emplace_back(getShape(_inputs[i]));
-    inputFloatPtrs.emplace_back(getBuffer<float>(_inputs[i]));
-  }
-
-  kernel(_equation, inputShapes, inputFloatPtrs, getShape(_output), getBuffer<float>(_output));
-}
-
-void EinsumLayer::run()
-{
-  if (_output->data_type() == OperandType::FLOAT32)
-  {
-    einsumFloat32();
-  }
-  else
-  {
-    throw std::runtime_error{"Einsum: unsupported data type"};
-  }
-}
-
-void EinsumLayer::configure(const std::vector<const IPortableTensor *> &inputs,
-                            std::string equation, IPortableTensor *output)
-{
-  assert(inputs.size() > 0);
-  assert(output != nullptr);
-
-  _inputs = inputs;
-  _equation = equation;
-  _output = output;
-}
-
-} // namespace onert::backend::cpu::ops
diff --git a/runtime/onert/backend/cpu/ops/EinsumLayer.h b/runtime/onert/backend/cpu/ops/EinsumLayer.h
deleted file mode 100644
index e05aad4661a..00000000000
--- a/runtime/onert/backend/cpu/ops/EinsumLayer.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_EINSUM_LAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_EINSUM_LAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-#include <functional>
-#include <memory>
-
-namespace nnfw::cker
-{
-class Einsum;
-} // namespace nnfw::cker
-
-namespace onert::backend::cpu::ops
-{
-
-class EinsumLayer : public ::onert::exec::IFunction
-{
-public:
-  EinsumLayer();
-  ~EinsumLayer();
-
-public:
-  void einsumFloat32();
-
-  void configure(const std::vector<const IPortableTensor *> &inputs, std::string equation,
-                 IPortableTensor *output);
-
-  void run() override;
-
-private:
-  std::vector<const IPortableTensor *> _inputs;
-  IPortableTensor *_output;
-
-  std::string _equation;
-
-  std::unique_ptr<nnfw::cker::Einsum> _einsum_kernel;
-};
-
-} // namespace onert::backend::cpu::ops
-
-#endif // __ONERT_BACKEND_CPU_OPS_EINSUM_LAYER_H__
diff --git a/runtime/onert/core/include/ir/Operations.Include.h b/runtime/onert/core/include/ir/Operations.Include.h
index 303ee9ae002..82024217027 100644
--- a/runtime/onert/core/include/ir/Operations.Include.h
+++ b/runtime/onert/core/include/ir/Operations.Include.h
@@ -33,7 +33,6 @@
 #include "ir/operation/Custom.h"
 #include "ir/operation/DepthToSpace.h"
 #include "ir/operation/DepthwiseConv2D.h"
-#include "ir/operation/Einsum.h"
 #include "ir/operation/ElementwiseActivation.h"
 #include "ir/operation/ElementwiseBinary.h"
 #include "ir/operation/ElementwiseUnary.h"
diff --git a/runtime/onert/core/include/ir/Operations.lst b/runtime/onert/core/include/ir/Operations.lst
index c37de7a8d46..0b87d2c76b9 100644
--- a/runtime/onert/core/include/ir/Operations.lst
+++ b/runtime/onert/core/include/ir/Operations.lst
@@ -36,7 +36,6 @@ OP(ConvertFp32ToFp16)
 OP(Custom)
 OP(DepthToSpace)
 OP(DepthwiseConv2D)
-OP(Einsum)
 OP(ElementwiseActivation)
 OP(ElementwiseBinary)
 OP(ElementwiseUnary)
diff --git a/runtime/onert/core/include/ir/operation/Einsum.h b/runtime/onert/core/include/ir/operation/Einsum.h
deleted file mode 100644
index a1faa7a1c15..00000000000
--- a/runtime/onert/core/include/ir/operation/Einsum.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_EINSUM_H__
-#define __ONERT_IR_OPERATION_EINSUM_H__
-
-#include "ir/Operation.h"
-#include "ir/InternalType.h"
-
-namespace onert::ir::operation
-{
-
-class Einsum : public Operation
-{
-public:
-  struct Param
-  {
-    std::string equation;
-  };
-
-public:
-  Einsum(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
-         const Param &param);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Einsum; }
-
-public:
-  const Param &param() const { return _param; }
-
-private:
-  Param _param;
-};
-
-} // namespace onert::ir::operation
-
-#endif // __ONERT_IR_OPERATION_EINSUM_H__
diff --git a/runtime/onert/core/src/ir/operation/Einsum.cc b/runtime/onert/core/src/ir/operation/Einsum.cc
deleted file mode 100644
index 50f6a8d1869..00000000000
--- a/runtime/onert/core/src/ir/operation/Einsum.cc
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Einsum.h"
-#include "ir/OperationVisitor.h"
-
-namespace onert::ir::operation
-{
-
-void Einsum::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Einsum::Einsum(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
-               const Param &param)
-  : Operation{OperandConstraint::createAtLeast(1u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace onert::ir::operation
diff --git a/runtime/onert/core/src/ir/train/operation/UntrainableOperation.test.cc b/runtime/onert/core/src/ir/train/operation/UntrainableOperation.test.cc
index 4ae72668dd9..a0eb5d4afdb 100644
--- a/runtime/onert/core/src/ir/train/operation/UntrainableOperation.test.cc
+++ b/runtime/onert/core/src/ir/train/operation/UntrainableOperation.test.cc
@@ -164,14 +164,6 @@ operation::DetectionPostProcess generateDetectionPostProcess()
                                          param};
 }
 
-operation::Einsum generateEinsum()
-{
-  operation::Einsum::Param param;
-  param.equation = "";
-
-  return operation::Einsum{OperandIndexSequence{1}, OperandIndexSequence{0}, param};
-}
-
 operation::ElementwiseActivation generateElementwiseActivation()
 {
   operation::ElementwiseActivation::Param param;
@@ -662,9 +654,6 @@ TEST(UntrainableOperation, testAllOps)
   const auto detection = generateDetectionPostProcess();
   verifyOp(detection);
 
-  const auto einsum = generateEinsum();
-  verifyOp(einsum);
-
   const auto activation = generateElementwiseActivation();
   verifyOp(activation);
 
@@ -942,12 +931,6 @@ TEST(UntrainableOperation, neg_TrainableOperationVisitor)
     EXPECT_ANY_THROW(visitor.invoke(*untrainable));
   }
 
-  {
-    const auto einsum = generateEinsum();
-    auto untrainable = generateUntrainableOperation(einsum);
-    EXPECT_ANY_THROW(visitor.invoke(*untrainable));
-  }
-
   {
     const auto activation = generateElementwiseActivation();
     auto untrainable = generateUntrainableOperation(activation);
diff --git a/runtime/onert/core/src/loader/BaseLoader.h b/runtime/onert/core/src/loader/BaseLoader.h
index 4f5083d33be..919f74866a6 100644
--- a/runtime/onert/core/src/loader/BaseLoader.h
+++ b/runtime/onert/core/src/loader/BaseLoader.h
@@ -138,7 +138,6 @@ template <typename LoaderDomain> class BaseLoader
   void loadCustom(const Operator *op, ir::Graph &subg);
   void loadDepthToSpace(const Operator *op, ir::Graph &subg);
   void loadDepthwiseConv2D(const Operator *op, ir::Graph &subg);
-  void loadEinsum(const Operator *op, ir::Graph &subg);
   void loadElementwiseActivation(const Operator *op, ir::Graph &subg,
                                  ir::operation::ElementwiseActivation::Type op_type,
                                  float alpha = 0.f, float beta = 0.f);
@@ -1124,7 +1123,6 @@ void BaseLoader<LoaderDomain>::loadCustom(const Operator *op, ir::Graph &subg)
     ReduceAll,
     MatrixBandPart,
     BatchMatMul,
-    Einsum,
     BroadcastTo,
     FusedBatchNorm,
     StatelessRandomUniform,
@@ -1138,7 +1136,6 @@ void BaseLoader<LoaderDomain>::loadCustom(const Operator *op, ir::Graph &subg)
     {"All", BuiltinOP::ReduceAll},
     {"MatrixBandPart", BuiltinOP::MatrixBandPart},
     {"BatchMatMulV2", BuiltinOP::BatchMatMul},
-    {"Einsum", BuiltinOP::Einsum},
     {"FusedBatchNormV3", BuiltinOP::FusedBatchNorm},
     {"BroadcastTo", BuiltinOP::BroadcastTo},
     {"StatelessRandomUniform", BuiltinOP::StatelessRandomUniform},
@@ -1164,9 +1161,6 @@ void BaseLoader<LoaderDomain>::loadCustom(const Operator *op, ir::Graph &subg)
       case BuiltinOP::BatchMatMul:
         loadBatchMatMul(op, subg);
         break;
-      case BuiltinOP::Einsum:
-        loadEinsum(op, subg);
-        break;
       case BuiltinOP::BroadcastTo:
         loadOperationTo<ir::operation::BroadcastTo>(op, subg);
         break;
@@ -1307,26 +1301,6 @@ void BaseLoader<LoaderDomain>::loadComparison(const Operator *op, ir::Graph &sub
   loadOperationTo<ir::operation::Comparison>(op, subg, param);
 }
 
-template <typename LoaderDomain>
-void BaseLoader<LoaderDomain>::loadEinsum(const Operator *op, ir::Graph &subg)
-{
-  ir::operation::Einsum::Param param;
-  if (op->custom_options() == nullptr)
-  {
-    throw std::runtime_error{"Einsum: empty equation"};
-  }
-  else
-  {
-    const auto attr_map = getCustomOpAttrMap(op);
-    param.equation = attr_map["equation"].ToString();
-  }
-
-  const auto es = loadOperationTo<ir::operation::Einsum>(op, subg, param);
-  if (es->getInputs().size() != 2)
-  {
-    throw std::runtime_error{"Einsum: NYI input - only support two inputs"};
-  }
-}
 template <typename LoaderDomain>
 void BaseLoader<LoaderDomain>::loadFusedBatchNorm(const Operator *op, ir::Graph &subg)
 {
diff --git a/runtime/tests/nnapi/CMakeLists.txt b/runtime/tests/nnapi/CMakeLists.txt
index cd6ee944cfc..afc98a69f0f 100644
--- a/runtime/tests/nnapi/CMakeLists.txt
+++ b/runtime/tests/nnapi/CMakeLists.txt
@@ -12,7 +12,7 @@ set(GENERATED_CPPS "${CMAKE_CURRENT_SOURCE_DIR}/src/generated/all_generated_V1_2
 # Skip script for generate test and use pre-generated test
 if (GENERATE_RUNTIME_NNAPI_TESTS)
   set(GENERATOR_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/specs/generate_test.sh")
-  file(GLOB_RECURSE TEST_SPECS "${CMAKE_CURRENT_SOURCE_DIR}/specs/*.mod.py")
+  file(GLOB_RECURSE TEST_SPECS CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/specs/*.mod.py")
 
   add_custom_command(OUTPUT ${GENERATED_CPPS}
                      COMMAND ${GENERATOR_SCRIPT}
diff --git a/runtime/tests/nnapi/bridge/include/NeuralNetworksEx.h b/runtime/tests/nnapi/bridge/include/NeuralNetworksEx.h
index f0387995df3..cf06b95ae60 100644
--- a/runtime/tests/nnapi/bridge/include/NeuralNetworksEx.h
+++ b/runtime/tests/nnapi/bridge/include/NeuralNetworksEx.h
@@ -506,21 +506,10 @@ typedef enum
   ANEURALNETWORKS_MATRIX_BAND_PART_EX = 50036,
 
   /**
-   * Tensor contraction over specified indices and outer product
    *
-   * Supported tensor {@link OperandCode}:
-   * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
-   *
-   * Supported tensor rank: up to 4
-   *
-   * Inputs:
-   * * 0 ~ n-1: The list of n input tensors.
-   * * 1: An 1-D tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}. Each element represent
-   * equation character.
-   *      Always scalar is 1.0 and zeroPoint is 0
+   * IMPORTANT NOTICE:
+   * ANEURALNETWORKS_REDUCE_MAX_EX operation is DEPRECATED
    *
-   * Outputs:
-   * * 0: An output tensor.
    */
   ANEURALNETWORKS_EINSUM_EX = 50037,
 
diff --git a/runtime/tests/nnapi/bridge/wrapper/OperationFactory.cc b/runtime/tests/nnapi/bridge/wrapper/OperationFactory.cc
index ba739f6180c..877df4759ce 100644
--- a/runtime/tests/nnapi/bridge/wrapper/OperationFactory.cc
+++ b/runtime/tests/nnapi/bridge/wrapper/OperationFactory.cc
@@ -1566,29 +1566,6 @@ OperationFactory::OperationFactory()
     return new operation::BatchMatMul{inputs, outputs, param};
   };
 
-  _map[ANEURALNETWORKS_EINSUM_EX] = [](const OperationFactory::Param &init_param,
-                                       Operands &operands) {
-    // Each input should be interpreted as follows:
-    //
-    //  0....n - 1 -> n Input Tensors Index
-    //  n -> equation
-    assert(init_param.input_count >= 1 && init_param.output_count == 1);
-
-    OperandIndexSequence inputs;
-    for (uint32_t n = 0; n < init_param.input_count - 1; ++n)
-    {
-      inputs.append(OperandIndex{init_param.inputs[n]});
-    }
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    operation::Einsum::Param param;
-    const OperandIndex equation_index{init_param.inputs[init_param.input_count - 1]};
-    std::vector<char> equation_vector = operands.at(equation_index).asVector<char>();
-    param.equation = std::string(equation_vector.begin(), equation_vector.end());
-
-    return new operation::Einsum{inputs, outputs, param};
-  };
-
   //  0 -> Input Tensor Index
   //  1 -> int32, int64, An 1-D int tensor Index
   _map[ANEURALNETWORKS_BROADCAST_TO_EX] = createSimpleBinaryOp<operation::BroadcastTo>;
diff --git a/runtime/tests/nnapi/nnapi_gtest.skip.aarch64-android.acl_cl b/runtime/tests/nnapi/nnapi_gtest.skip.aarch64-android.acl_cl
index 4e4d68871b9..5fa03711e44 100644
--- a/runtime/tests/nnapi/nnapi_gtest.skip.aarch64-android.acl_cl
+++ b/runtime/tests/nnapi/nnapi_gtest.skip.aarch64-android.acl_cl
@@ -59,11 +59,6 @@ GeneratedTests.dequantize_v1_2
 GeneratedTests.dequantize_v1_2_zero_sized
 GeneratedTests.dequantize_v1_2_zero_sized_float16
 GeneratedTests.div_dynamic_nnfw
-GeneratedTests.einsum_ex_float_matmul_2x2_2
-GeneratedTests.einsum_ex_float_matmul_3x2_3
-GeneratedTests.einsum_ex_float_matmul_3x3_4
-GeneratedTests.einsum_ex_float_matmul_4x4_4
-GeneratedTests.einsum_ex_float_matmul_4x4_4_2
 GeneratedTests.equal_dynamic_float_nnfw
 GeneratedTests.exp_
 GeneratedTests.exp_dynamic_nnfw
diff --git a/runtime/tests/nnapi/nnapi_gtest.skip.aarch64-android.acl_neon b/runtime/tests/nnapi/nnapi_gtest.skip.aarch64-android.acl_neon
index 9e337bc7eba..042e8f71ced 100644
--- a/runtime/tests/nnapi/nnapi_gtest.skip.aarch64-android.acl_neon
+++ b/runtime/tests/nnapi/nnapi_gtest.skip.aarch64-android.acl_neon
@@ -58,11 +58,6 @@ GeneratedTests.dequantize_v1_2
 GeneratedTests.dequantize_v1_2_zero_sized
 GeneratedTests.dequantize_v1_2_zero_sized_float16
 GeneratedTests.div_dynamic_nnfw
-GeneratedTests.einsum_ex_float_matmul_2x2_2
-GeneratedTests.einsum_ex_float_matmul_3x2_3
-GeneratedTests.einsum_ex_float_matmul_3x3_4
-GeneratedTests.einsum_ex_float_matmul_4x4_4
-GeneratedTests.einsum_ex_float_matmul_4x4_4_2
 GeneratedTests.equal_boolean
 GeneratedTests.equal_dynamic_float_nnfw
 GeneratedTests.exp_
diff --git a/runtime/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_cl b/runtime/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_cl
index 4e4d68871b9..5fa03711e44 100644
--- a/runtime/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_cl
+++ b/runtime/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_cl
@@ -59,11 +59,6 @@ GeneratedTests.dequantize_v1_2
 GeneratedTests.dequantize_v1_2_zero_sized
 GeneratedTests.dequantize_v1_2_zero_sized_float16
 GeneratedTests.div_dynamic_nnfw
-GeneratedTests.einsum_ex_float_matmul_2x2_2
-GeneratedTests.einsum_ex_float_matmul_3x2_3
-GeneratedTests.einsum_ex_float_matmul_3x3_4
-GeneratedTests.einsum_ex_float_matmul_4x4_4
-GeneratedTests.einsum_ex_float_matmul_4x4_4_2
 GeneratedTests.equal_dynamic_float_nnfw
 GeneratedTests.exp_
 GeneratedTests.exp_dynamic_nnfw
diff --git a/runtime/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_neon b/runtime/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_neon
index 9e337bc7eba..042e8f71ced 100644
--- a/runtime/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_neon
+++ b/runtime/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_neon
@@ -58,11 +58,6 @@ GeneratedTests.dequantize_v1_2
 GeneratedTests.dequantize_v1_2_zero_sized
 GeneratedTests.dequantize_v1_2_zero_sized_float16
 GeneratedTests.div_dynamic_nnfw
-GeneratedTests.einsum_ex_float_matmul_2x2_2
-GeneratedTests.einsum_ex_float_matmul_3x2_3
-GeneratedTests.einsum_ex_float_matmul_3x3_4
-GeneratedTests.einsum_ex_float_matmul_4x4_4
-GeneratedTests.einsum_ex_float_matmul_4x4_4_2
 GeneratedTests.equal_boolean
 GeneratedTests.equal_dynamic_float_nnfw
 GeneratedTests.exp_
diff --git a/runtime/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_cl b/runtime/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_cl
index 854d6ac801a..46e6862b13f 100644
--- a/runtime/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_cl
+++ b/runtime/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_cl
@@ -59,11 +59,6 @@ GeneratedTests.dequantize_v1_2
 GeneratedTests.dequantize_v1_2_zero_sized
 GeneratedTests.dequantize_v1_2_zero_sized_float16
 GeneratedTests.div_dynamic_nnfw
-GeneratedTests.einsum_ex_float_matmul_2x2_2
-GeneratedTests.einsum_ex_float_matmul_3x2_3
-GeneratedTests.einsum_ex_float_matmul_3x3_4
-GeneratedTests.einsum_ex_float_matmul_4x4_4
-GeneratedTests.einsum_ex_float_matmul_4x4_4_2
 GeneratedTests.equal_dynamic_float_nnfw
 GeneratedTests.exp_
 GeneratedTests.exp_dynamic_nnfw
diff --git a/runtime/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon b/runtime/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon
index f636f551009..881a3b63ec9 100644
--- a/runtime/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon
+++ b/runtime/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon
@@ -58,11 +58,6 @@ GeneratedTests.dequantize_v1_2
 GeneratedTests.dequantize_v1_2_zero_sized
 GeneratedTests.dequantize_v1_2_zero_sized_float16
 GeneratedTests.div_dynamic_nnfw
-GeneratedTests.einsum_ex_float_matmul_2x2_2
-GeneratedTests.einsum_ex_float_matmul_3x2_3
-GeneratedTests.einsum_ex_float_matmul_3x3_4
-GeneratedTests.einsum_ex_float_matmul_4x4_4
-GeneratedTests.einsum_ex_float_matmul_4x4_4_2
 GeneratedTests.equal_boolean
 GeneratedTests.equal_dynamic_float_nnfw
 GeneratedTests.exp_
diff --git a/runtime/tests/nnapi/specs/Ex/einsum_ex_float.mod.py b/runtime/tests/nnapi/specs/Ex/einsum_ex_float.mod.py
deleted file mode 100644
index 6b247c8b719..00000000000
--- a/runtime/tests/nnapi/specs/Ex/einsum_ex_float.mod.py
+++ /dev/null
@@ -1,154 +0,0 @@
-def test(name, lhs, rhs, equation, output, lhs_data, rhs_data, output_data):
-    model = Model().Operation("EINSUM_EX", lhs, rhs, equation).To(output)
-    example = Example({
-        lhs: lhs_data,
-        rhs: rhs_data,
-        output: output_data,
-    }, model=model, name=name)
-
-test(
-    name = 'matmul_2x2_2',
-    lhs = Input("input0", "TENSOR_FLOAT32", "{2, 3}"),
-    rhs = Input("input1", "TENSOR_FLOAT32", "{3, 2}"),
-    equation = Parameter("eq", "TENSOR_QUANT8_ASYMM", "{9}, 1.0, 0",
-        [105, 107, 44, 107, 106, 45, 62, 105, 106]), # ik,kj->ij
-    lhs_data=[0., 1., 2., 3., 4., 5.],
-    rhs_data=[0., 3., 1., 4., 2., 5.],
-    output=Output("output0", "TENSOR_FLOAT32", "{2, 2}"),
-    output_data=[5., 14., 14., 50.]
-)
-
-# abc,cde->abde
-dim_a = 2
-dim_b = 2
-dim_c = 4
-dim_d = 3
-dim_e = 3
-
-lhs_value = [x for x in range(dim_a * dim_b * dim_c)]
-rhs_value = [x for x in range(dim_c * dim_d * dim_e)]
-result_value = [0 for x in range(dim_a * dim_b * dim_d * dim_e)]
-
-for a in range(dim_a):
-    for b in range(dim_b):
-        for d in range(dim_d):
-            for e in range(dim_e):
-                result_index = e + dim_e * (d + dim_d * (b + dim_b * a))
-
-                for c in range(dim_c):
-                    lhs_index = c + dim_c * (b + dim_b * a)
-                    rhs_index = e + dim_e * (d + dim_d * c)
-                    result_value[result_index] = result_value[result_index] + lhs_value[lhs_index] * rhs_value[rhs_index]
-
-test(
-    name = 'matmul_3x3_4',
-    lhs = Input("input0", "TENSOR_FLOAT32", "{2, 2, 4}"),
-    rhs = Input("input1", "TENSOR_FLOAT32", "{4, 3, 3}"),
-    equation = Parameter("eq", "TENSOR_QUANT8_ASYMM", "{13}, 1.0, 0",
-        [97, 98, 99, 44, 99, 100, 101, 45, 62, 97, 98, 100, 101]), # abc,cde->abde
-    lhs_data=lhs_value,
-    rhs_data=rhs_value,
-    output=Output("output0", "TENSOR_FLOAT32", "{2, 2, 3, 3}"),
-    output_data=result_value
-)
-
-# abc,cd->abd
-dim_a = 2
-dim_b = 3
-dim_c = 4
-dim_d = 3
-
-lhs_value = [x for x in range(dim_a * dim_b * dim_c)]
-rhs_value = [x for x in range(dim_c * dim_d)]
-result_value = [0 for x in range(dim_a * dim_b * dim_d)]
-
-for a in range(dim_a):
-    for b in range(dim_b):
-        for d in range(dim_d):
-            result_index = d + dim_d * (b + dim_b * a)
-
-            for c in range(dim_c):
-                lhs_index = c + dim_c * (b + dim_b * a)
-                rhs_index = d + dim_d * c
-                result_value[result_index] = result_value[result_index] + lhs_value[lhs_index] * rhs_value[rhs_index]
-
-test(
-    name = 'matmul_3x2_3',
-    lhs = Input("input0", "TENSOR_FLOAT32", "{2, 3, 4}"),
-    rhs = Input("input1", "TENSOR_FLOAT32", "{4, 3}"),
-    equation = Parameter("eq", "TENSOR_QUANT8_ASYMM", "{11}, 1.0, 0",
-        [97, 98, 99, 44, 99, 100, 45, 62, 97, 98, 100]), # abc,cd->abd
-    lhs_data=lhs_value,
-    rhs_data=rhs_value,
-    output=Output("output0", "TENSOR_FLOAT32", "{2, 3, 3}"),
-    output_data=result_value
-)
-
-
-# abcd,adbe->acbe
-dim_a = 2
-dim_b = 3
-dim_c = 4
-dim_d = 2
-dim_e = 4
-
-lhs_value = [x for x in range(dim_a * dim_b * dim_c * dim_d)]
-rhs_value = [x for x in range(dim_a * dim_d * dim_b * dim_e)]
-result_value = [0 for x in range(dim_a * dim_c * dim_b * dim_e)]
-
-for a in range(dim_a):
-    for c in range(dim_c):
-        for b in range(dim_b):
-            for e in range(dim_e):
-                result_index = e + dim_e * (b + dim_b * (c + dim_c * a))
-
-                for d in range(dim_d):
-                    lhs_index = d + dim_d * (c + dim_c * (b + dim_b * a))
-                    rhs_index = e + dim_e * (b + dim_b * (d + dim_d * a))
-                    result_value[result_index] = result_value[result_index] + lhs_value[lhs_index] * rhs_value[rhs_index]
-
-test(
-    name = 'matmul_4x4_4',
-    lhs = Input("input0", "TENSOR_FLOAT32", "{2, 3, 4, 2}"),
-    rhs = Input("input1", "TENSOR_FLOAT32", "{2, 2, 3, 4}"),
-    equation = Parameter("eq", "TENSOR_QUANT8_ASYMM", "{15}, 1.0, 0",
-        [97, 98, 99, 100, 44, 97, 100, 98, 101, 45, 62, 97, 99, 98, 101]), # abcd,adbe->acbe
-    lhs_data=lhs_value,
-    rhs_data=rhs_value,
-    output=Output("output0", "TENSOR_FLOAT32", "{2, 4, 3, 4}"),
-    output_data=result_value
-)
-
-# abcd,aecd->aceb
-dim_a = 2
-dim_b = 3
-dim_c = 2
-dim_d = 4
-dim_e = 3
-
-lhs_value = [x for x in range(dim_a * dim_b * dim_c * dim_d)]
-rhs_value = [x for x in range(dim_a * dim_e * dim_c * dim_d)]
-result_value = [0 for x in range(dim_a * dim_c * dim_e * dim_b)]
-
-for a in range(dim_a):
-    for c in range(dim_c):
-        for b in range(dim_e):
-            for e in range(dim_b):
-                result_index = b + dim_b * (e + dim_e * (c + dim_c * a))
-
-                for d in range(dim_d):
-                    lhs_index = d + dim_d * (c + dim_c * (b + dim_b * a))
-                    rhs_index = d + dim_d * (c + dim_c * (e + dim_e * a))
-                    result_value[result_index] = result_value[result_index] + lhs_value[lhs_index] * rhs_value[rhs_index]
-
-test(
-    name = 'matmul_4x4_4_2',
-    lhs = Input("input0", "TENSOR_FLOAT32", "{2, 3, 2, 4}"),
-    rhs = Input("input1", "TENSOR_FLOAT32", "{2, 3, 2, 4}"),
-    equation = Parameter("eq", "TENSOR_QUANT8_ASYMM", "{15}, 1.0, 0",
-        [97, 98, 99, 100, 44, 97, 101, 99, 100, 45, 62, 97, 99, 101, 98]), # abcd,aecd->aceb
-    lhs_data=lhs_value,
-    rhs_data=rhs_value,
-    output=Output("output0", "TENSOR_FLOAT32", "{2, 2, 3, 3}"),
-    output_data=result_value
-)