From 1541736266c80bf44f51d212091a0ca2bd0eee27 Mon Sep 17 00:00:00 2001 From: HemanthSai7 Date: Fri, 17 Jan 2025 09:09:04 +0530 Subject: [PATCH 1/3] Add INT32 support to SUB --- tensorflow/lite/micro/kernels/sub.cc | 89 ++++++++++++++++------- tensorflow/lite/micro/kernels/sub_test.cc | 33 +++++++++ 2 files changed, 96 insertions(+), 26 deletions(-) diff --git a/tensorflow/lite/micro/kernels/sub.cc b/tensorflow/lite/micro/kernels/sub.cc index 930bc0baaef..aae97b28021 100644 --- a/tensorflow/lite/micro/kernels/sub.cc +++ b/tensorflow/lite/micro/kernels/sub.cc @@ -36,31 +36,68 @@ void* SubInit(TfLiteContext* context, const char* buffer, size_t length) { return context->AllocatePersistentBuffer(context, sizeof(OpDataSub)); } -void EvalSub(TfLiteContext* context, TfLiteNode* node, TfLiteSubParams* params, - const OpDataSub* data, const TfLiteEvalTensor* input1, - const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) { - float output_activation_min, output_activation_max; - CalculateActivationRange(params->activation, &output_activation_min, - &output_activation_max); - tflite::ArithmeticParams op_params; - SetActivationParams(output_activation_min, output_activation_max, &op_params); - if (data->requires_broadcast) { - tflite::reference_ops::BroadcastSubSlow( - op_params, tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorData(input1), - tflite::micro::GetTensorShape(input2), - tflite::micro::GetTensorData(input2), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - } else { - tflite::reference_ops::SubWithActivation( - op_params, tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorData(input1), - tflite::micro::GetTensorShape(input2), - tflite::micro::GetTensorData(input2), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); +TfLiteStatus EvalSub(TfLiteContext* context, TfLiteNode* node, + TfLiteSubParams* params, const OpDataSub* data, + const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) { + switch (output->type) { + case kTfLiteFloat32: { + float output_activation_min, output_activation_max; + CalculateActivationRange(params->activation, &output_activation_min, + &output_activation_max); + tflite::ArithmeticParams op_params; + SetActivationParams(output_activation_min, output_activation_max, + &op_params); + if (data->requires_broadcast) { + tflite::reference_ops::BroadcastSubSlow( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } else { + tflite::reference_ops::SubWithActivation( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } + } break; + case kTfLiteInt32: { + int32_t output_activation_min, output_activation_max; + CalculateActivationRange(params->activation, &output_activation_min, + &output_activation_max); + tflite::ArithmeticParams op_params; + SetActivationParams(output_activation_min, output_activation_max, + &op_params); + if (data->requires_broadcast) { + tflite::reference_ops::BroadcastSubSlow( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } else { + tflite::reference_ops::SubWithActivation( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } + } break; + default: + MicroPrintf("Type %s (%d) not supported.", + TfLiteTypeGetName(output->type), output->type); + return kTfLiteError; } + + return kTfLiteOk; } TfLiteStatus EvalSubQuantized(TfLiteContext* context, TfLiteNode* node, @@ -68,7 +105,7 @@ TfLiteStatus EvalSubQuantized(TfLiteContext* context, TfLiteNode* node, const TfLiteEvalTensor* input1, const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) { - tflite::ArithmeticParams op_params; + tflite::ArithmeticParams op_params = {}; op_params.left_shift = data->left_shift; op_params.input1_offset = data->input1_offset; op_params.input1_multiplier = data->input1_multiplier; @@ -147,7 +184,7 @@ TfLiteStatus SubEval(TfLiteContext* context, TfLiteNode* node) { TFLITE_DCHECK(node->user_data != nullptr); const OpDataSub& data = *(static_cast(node->user_data)); - if (output->type == kTfLiteFloat32) { + if (output->type == kTfLiteFloat32 || output->type == kTfLiteInt32) { EvalSub(context, node, params, &data, input1, input2, output); } else if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) { TF_LITE_ENSURE_OK(context, EvalSubQuantized(context, node, params, &data, diff --git a/tensorflow/lite/micro/kernels/sub_test.cc b/tensorflow/lite/micro/kernels/sub_test.cc index d5226ebcaae..302df42c1b8 100644 --- a/tensorflow/lite/micro/kernels/sub_test.cc +++ b/tensorflow/lite/micro/kernels/sub_test.cc @@ -105,6 +105,27 @@ void TestSubFloat(int* input1_dims_data, const float* input1_data, ElementCount(*output_dims), activation); } +void TestSubInt32(int* input1_dims_data, const int32_t* input1_data, + int* input2_dims_data, const int32_t* input2_data, + int* output_dims_data, const int32_t* expected_output, + TfLiteFusedActivation activation, int32_t* output_data) { + TfLiteIntArray* input1_dims = IntArrayFromInts(input1_dims_data); + TfLiteIntArray* input2_dims = IntArrayFromInts(input2_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); + + constexpr int inputs_size = 2; + constexpr int outputs_size = 1; + constexpr int tensors_size = inputs_size + outputs_size; + TfLiteTensor tensors[tensors_size] = { + CreateTensor(input1_data, input1_dims), + CreateTensor(input2_data, input2_dims), + CreateTensor(output_data, output_dims), + }; + + ValidateSubGoldens(tensors, tensors_size, expected_output, output_data, + ElementCount(*output_dims), activation); +} + template void TestSubQuantized(int* input1_dims_data, const float* input1_data, T* input1_quantized, float input1_scale, @@ -219,6 +240,18 @@ TF_LITE_MICRO_TEST(FloatSubWithScalarBroadcast) { } } +TF_LITE_MICRO_TEST(Int32SubNoActivation) { + int inout_shape[] = {4, 1, 2, 2, 1}; + const int32_t input1_values[] = {-2, 2147483646, -1, 1146622854}; + const int32_t input2_values[] = {3, 1, -2147483647, -726978367}; + const int32_t golden_values[] = {-5, 2147483645, 2147483646, 1873601221}; + const int kOutputDimsCount = 4; + int32_t output_data[kOutputDimsCount]; + tflite::testing::TestSubInt32(inout_shape, input1_values, inout_shape, + input2_values, inout_shape, golden_values, + kTfLiteActNone, output_data); +} + TF_LITE_MICRO_TEST(QuantizedSubNoActivationInt8) { const float scales[] = {0.25, 0.5, 1.0}; const int zero_points[] = {-10, 4, 13}; From 80082a16bc643b785497190515f620effd816fd7 Mon Sep 17 00:00:00 2001 From: HemanthSai7 Date: Fri, 7 Mar 2025 12:00:41 +0530 Subject: [PATCH 2/3] Update SUB kernel and license year --- tensorflow/lite/micro/kernels/sub.cc | 5 +++-- tensorflow/lite/micro/kernels/sub_common.cc | 12 +++++++++++- tensorflow/lite/micro/kernels/sub_test.cc | 4 +++- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/tensorflow/lite/micro/kernels/sub.cc b/tensorflow/lite/micro/kernels/sub.cc index aae97b28021..0df83cb4e53 100644 --- a/tensorflow/lite/micro/kernels/sub.cc +++ b/tensorflow/lite/micro/kernels/sub.cc @@ -1,4 +1,4 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2025 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -185,7 +185,8 @@ TfLiteStatus SubEval(TfLiteContext* context, TfLiteNode* node) { const OpDataSub& data = *(static_cast(node->user_data)); if (output->type == kTfLiteFloat32 || output->type == kTfLiteInt32) { - EvalSub(context, node, params, &data, input1, input2, output); + TF_LITE_ENSURE_OK( + context, EvalSub(context, node, params, &data, input1, input2, output)); } else if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) { TF_LITE_ENSURE_OK(context, EvalSubQuantized(context, node, params, &data, input1, input2, output)); diff --git a/tensorflow/lite/micro/kernels/sub_common.cc b/tensorflow/lite/micro/kernels/sub_common.cc index d6647462faa..6637906ab6b 100644 --- a/tensorflow/lite/micro/kernels/sub_common.cc +++ b/tensorflow/lite/micro/kernels/sub_common.cc @@ -1,4 +1,4 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2025 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -98,6 +98,16 @@ TfLiteStatus SubPrepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_STATUS( CalculateOpDataSub(context, params, input1, input2, output, data)); + if (output->type == kTfLiteInt32) { + // Only support INT32 unquantized SUB for now. + TF_LITE_ENSURE_EQ(context, input1->quantization.type, + kTfLiteNoQuantization); + TF_LITE_ENSURE_EQ(context, input2->quantization.type, + kTfLiteNoQuantization); + TF_LITE_ENSURE_EQ(context, output->quantization.type, + kTfLiteNoQuantization); + } + micro_context->DeallocateTempTfLiteTensor(input1); micro_context->DeallocateTempTfLiteTensor(input2); micro_context->DeallocateTempTfLiteTensor(output); diff --git a/tensorflow/lite/micro/kernels/sub_test.cc b/tensorflow/lite/micro/kernels/sub_test.cc index 302df42c1b8..d28a08980a9 100644 --- a/tensorflow/lite/micro/kernels/sub_test.cc +++ b/tensorflow/lite/micro/kernels/sub_test.cc @@ -1,4 +1,4 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2025 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -240,6 +240,7 @@ TF_LITE_MICRO_TEST(FloatSubWithScalarBroadcast) { } } +#if !defined(XTENSA) TF_LITE_MICRO_TEST(Int32SubNoActivation) { int inout_shape[] = {4, 1, 2, 2, 1}; const int32_t input1_values[] = {-2, 2147483646, -1, 1146622854}; @@ -251,6 +252,7 @@ TF_LITE_MICRO_TEST(Int32SubNoActivation) { input2_values, inout_shape, golden_values, kTfLiteActNone, output_data); } +#endif TF_LITE_MICRO_TEST(QuantizedSubNoActivationInt8) { const float scales[] = {0.25, 0.5, 1.0}; From 5bc3e4d5b307cacf46e7a8e74f698e21a4ca7df7 Mon Sep 17 00:00:00 2001 From: HemanthSai7 Date: Sat, 8 Mar 2025 12:04:57 +0530 Subject: [PATCH 3/3] Added if on TestSubInt32 to pass tests --- tensorflow/lite/micro/kernels/sub_test.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/lite/micro/kernels/sub_test.cc b/tensorflow/lite/micro/kernels/sub_test.cc index d28a08980a9..97732866a4a 100644 --- a/tensorflow/lite/micro/kernels/sub_test.cc +++ b/tensorflow/lite/micro/kernels/sub_test.cc @@ -105,6 +105,7 @@ void TestSubFloat(int* input1_dims_data, const float* input1_data, ElementCount(*output_dims), activation); } +#if !defined(XTENSA) void TestSubInt32(int* input1_dims_data, const int32_t* input1_data, int* input2_dims_data, const int32_t* input2_data, int* output_dims_data, const int32_t* expected_output, @@ -125,6 +126,7 @@ void TestSubInt32(int* input1_dims_data, const int32_t* input1_data, ValidateSubGoldens(tensors, tensors_size, expected_output, output_data, ElementCount(*output_dims), activation); } +#endif template void TestSubQuantized(int* input1_dims_data, const float* input1_data,