Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
89 commits
Select commit Hold shift + click to select a range
ffffc7d
Create directory to hold optimized RISC-V vector instrinsics implemen…
JaimeHW Apr 14, 2025
54e272a
Add placeholder file until we adapt the convolution implementation to…
JaimeHW Apr 14, 2025
863d5df
Annotations
numbers1234567 Apr 14, 2025
ca71870
Offset() and MultiplyByQuantizedMultiplier() references
numbers1234567 Apr 14, 2025
3b24e0c
Add vector intrinsiscs convolution implementation
JaimeHW Apr 14, 2025
1632652
Add empty makefile for building the vector intrinsics implementations
JaimeHW Apr 14, 2025
a98604f
Build and test our 2d convolution implementation with tflite micro
JaimeHW Apr 15, 2025
e173412
Fix formatting
JaimeHW Apr 15, 2025
d980b28
Fix padding logic
JaimeHW Apr 15, 2025
a749783
Build tflite with custom kernel
numbers1234567 Apr 15, 2025
3d6bbc1
Partially vectorized 2D convolution implementation that passes all co…
JaimeHW Apr 15, 2025
0601584
Remove duplicate makefile
JaimeHW Apr 15, 2025
c4c873b
Restore TFLM primary Makefile
JaimeHW Apr 15, 2025
e51f7c1
Use vwmacc for convolution channel accumulation
JaimeHW Apr 15, 2025
228ae90
Add comments
JaimeHW Apr 15, 2025
4d7257f
Add MicroPrintf output to ConvPerChannelRVV
JaimeHW Apr 15, 2025
93cf8b2
Fix typo
JaimeHW Apr 15, 2025
8f4a7a4
MicroPrintf in base implementation
numbers1234567 Apr 16, 2025
5dc8182
Fix formatting
JaimeHW Apr 16, 2025
b09116c
Merge branch 'main' of https://github.com/Peanut-Microsystems/tflite-…
numbers1234567 Apr 18, 2025
d2ae070
Information on building w/ custom implementations
numbers1234567 Apr 18, 2025
0f5b187
Update PEANUT-README.md
pseudonam-gc Apr 18, 2025
06dc715
Vectorize out_x dimension
JaimeHW Apr 19, 2025
1d20d31
DepthwiseConvPerChannel
JaimeHW Apr 19, 2025
8ca51a2
Remove old includes
JaimeHW Apr 19, 2025
63700c5
Perform 64bit operations with 32bit vector intrinsics
JaimeHW Apr 19, 2025
a8a51b0
Add comments
JaimeHW Apr 19, 2025
0a88c11
Change C-style casts to static_cast
JaimeHW Apr 19, 2025
a09fc3e
FullConnectedRVV and FullyConnectedPerChannelRVV
JaimeHW Apr 20, 2025
5875dfa
Delete unused file copy
JaimeHW Apr 20, 2025
221f33d
Reimplement convolution dispatcher
JaimeHW Apr 20, 2025
54ba9fe
depwthwise convolution dispatcher
JaimeHW Apr 20, 2025
81d4da0
FullyConnected dispatcher
JaimeHW Apr 20, 2025
bcb2163
Testing and issues
numbers1234567 Apr 22, 2025
0070b60
Vectorized softmax
JaimeHW Apr 24, 2025
8d712e5
Update Makefile
JaimeHW Apr 24, 2025
5949255
Restore reference TFLM conv.cc
JaimeHW Apr 24, 2025
9e18349
Fix Softmax
JaimeHW Apr 24, 2025
f451a40
Fix incorrect nudge factor in Softmax
JaimeHW Apr 24, 2025
b427443
Update Softmax
JaimeHW Apr 24, 2025
ed5d6aa
Add comments to vectorized Softmax
JaimeHW Apr 24, 2025
20bcb64
Take number of interations as command line argument
JaimeHW Oct 16, 2025
cd531e3
Set optimization level to O3
JaimeHW Oct 16, 2025
540c22f
Add slimmed down version of micro_speech_test.cc for benchmarking
JaimeHW Oct 18, 2025
3176970
Optimize FullyConnectedPerChannelRVV
JaimeHW Nov 6, 2025
2189498
Update .gitignore
JaimeHW Nov 6, 2025
6983e55
Update riscv32_vector makefile
JaimeHW Nov 6, 2025
9fb07cf
Add vector optimized 8-bit MaxPool kernel
JaimeHW Nov 14, 2025
86503c9
Add 16-bit vector optimized MaxPool kernel
JaimeHW Nov 14, 2025
c1d046f
Formatting
JaimeHW Nov 14, 2025
7bb21b1
Fix bug in FullyConnectedPerChannel and refactor requantization logic
JaimeHW Nov 14, 2025
2b58b5f
Change header guard
JaimeHW Nov 14, 2025
69d3f00
Reformat and fix bugs in SoftMax
JaimeHW Nov 16, 2025
081f34b
Initial vector optimized RFFT kernel
JaimeHW Nov 16, 2025
871cbc9
Full vector optimized RFFT and FFT kernels
JaimeHW Nov 16, 2025
31d59ca
Initial vector optimized FilterBank kernel
JaimeHW Nov 18, 2025
536315f
Update vector optimized FilterBank kernel
JaimeHW Nov 18, 2025
3c21233
Update vector optimized FilterBank kernel
JaimeHW Nov 18, 2025
64c8a18
Update micro_speech_test for more accurate benchmarking
JaimeHW Nov 18, 2025
4307f3c
Revert target makefile RISCV_CODE_MODEL
JaimeHW Nov 18, 2025
9ee7fef
New person_detection2.cc for benchmarking
JaimeHW Nov 18, 2025
7dea3b9
Format comments
JaimeHW Nov 18, 2025
4255b94
Update micro_speech_test2
JaimeHW Nov 18, 2025
065051d
FilterBank: Accumulate carries into a vector register using masked ad…
JaimeHW Nov 19, 2025
3affc81
Optimize RFFT by replacing gather/scatter with strided loads
JaimeHW Nov 19, 2025
6ae426d
SoftMax: Fix vector-vector merge intrinsic usage and fix 64-bit emula…
JaimeHW Nov 19, 2025
bbe6a3e
Vector optimized FilterBankLog kernel
JaimeHW Nov 19, 2025
2190e8a
Optimize FilterbankLogRVV with branchless normalization and fix LUT g…
JaimeHW Nov 19, 2025
007ce6e
Remove redundant included headers
JaimeHW Nov 19, 2025
0633133
Optimize FilterbankLogRVV kernel using widening multiply and arithmet…
JaimeHW Nov 19, 2025
06dd4c8
FilterbankLogRVV: Use widening instructions and fix signed scaling logic
JaimeHW Nov 19, 2025
e3298e3
Update FilterBankLogRVV
JaimeHW Nov 19, 2025
0a69b8d
Update FilterBankLogRVV
JaimeHW Nov 19, 2025
41bdbd8
Fix register spilling in FilterBank
JaimeHW Nov 21, 2025
7592a96
Optimize register usage for convolution and fullyconnected kernels
JaimeHW Nov 21, 2025
c79cb32
Softmax: Optimize register usage
JaimeHW Nov 21, 2025
e694a1d
RFFT: Switch to LMUL=2 to reduce register pressure
JaimeHW Nov 21, 2025
ca9555f
Cleanup
JaimeHW Jan 6, 2026
5c90df1
Merge branch 'tensorflow:main' into main
JaimeHW Jan 6, 2026
03bb47e
Cleanup headers
JaimeHW Jan 6, 2026
af5cd5a
Merge remote-tracking branch 'refs/remotes/origin/main'
JaimeHW Jan 6, 2026
5236854
Update .gitignore
JaimeHW Jan 6, 2026
e6338a2
Remove PEANUT-README.md
JaimeHW Jan 6, 2026
0be2e65
Restore person detections main.cc
JaimeHW Jan 6, 2026
6d71d64
Add new line EOF
JaimeHW Jan 6, 2026
a73a8ec
Restore end-of-file new lines
JaimeHW Jan 6, 2026
0cfa2f2
Remove new line at end of micro speech Makefile
JaimeHW Jan 6, 2026
b5820fa
Restore
JaimeHW Jan 6, 2026
b079dca
Remove
JaimeHW Jan 6, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
*audio_frontend*
*google*
*__pycache__*
.venv
venv
gen

Expand Down
2 changes: 1 addition & 1 deletion tensorflow/lite/micro/examples/micro_speech/Makefile.inc
Original file line number Diff line number Diff line change
Expand Up @@ -61,4 +61,4 @@ list_micro_speech_example_sources:
@echo $(MICRO_SPEECH_SRCS)

list_micro_speech_example_headers:
@echo $(MICRO_SPEECH_HDRS)
@echo $(MICRO_SPEECH_HDRS)
199 changes: 199 additions & 0 deletions tensorflow/lite/micro/kernels/riscv_vector/conv.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#include "tensorflow/lite/micro/kernels/conv.h"

#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h"
#include "tensorflow/lite/kernels/internal/reference/conv.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/conv.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_log.h"

#include "tensorflow/lite/micro/kernels/riscv_vector/conv_rvv.h"

namespace tflite {
namespace {

TfLiteStatus ConvEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kConvInputTensor);
const TfLiteEvalTensor* filter =
tflite::micro::GetEvalInput(context, node, kConvWeightsTensor);
const TfLiteEvalTensor* bias =
(NumInputs(node) == 3)
? tflite::micro::GetEvalInput(context, node, kConvBiasTensor)
: nullptr;
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kConvOutputTensor);

TFLITE_DCHECK(node->builtin_data != nullptr);
const auto& params =
*(reinterpret_cast<TfLiteConvParams*>(node->builtin_data));
TFLITE_DCHECK(node->user_data != nullptr);
const auto& data = *(static_cast<const OpDataConv*>(node->user_data));

#ifdef USE_TFLM_COMPRESSION

MicroContext* micro_context = GetMicroContext(context);

const CompressionTensorData* weights_comp_td =
micro_context->GetTensorCompressionData(node, kConvWeightsTensor);
const CompressionTensorData* bias_comp_td =
micro_context->GetTensorCompressionData(node, kConvBiasTensor);

#endif // USE_TFLM_COMPRESSION

switch (input->type) { // Already know in/out types are same.
case kTfLiteFloat32: {
tflite::reference_ops::Conv(
ConvParamsFloat(params, data), tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(filter),
#ifdef USE_TFLM_COMPRESSION
tflite::micro::GetTensorData<float>(micro_context, filter,
weights_comp_td,
data.weights_scratch_index),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetOptionalTensorData<float>(
micro_context, bias, bias_comp_td, data.bias_scratch_index),
#else // USE_TFLM_COMPRESSION
tflite::micro::GetTensorData<float>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetOptionalTensorData<float>(bias),
#endif // USE_TFLM_COMPRESSION
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output),
tflite::micro::GetTensorShape(nullptr), nullptr);
break;
}
case kTfLiteInt16: {
if (bias == nullptr || bias->type == kTfLiteInt32) {
reference_integer_ops::ConvPerChannel(
ConvParamsQuantized(params, data),
data.per_channel_output_multiplier, data.per_channel_output_shift,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int16_t>(input),
tflite::micro::GetTensorShape(filter),
#ifdef USE_TFLM_COMPRESSION
tflite::micro::GetTensorData<int8_t>(micro_context, filter,
weights_comp_td,
data.weights_scratch_index),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetOptionalTensorData<int32_t>(
micro_context, bias, bias_comp_td, data.bias_scratch_index),
#else // USE_TFLM_COMPRESSION
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetOptionalTensorData<std::int32_t>(bias),
#endif // USE_TFLM_COMPRESSION
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output));
} else if (bias->type == kTfLiteInt64) {
reference_integer_ops::ConvPerChannel(
ConvParamsQuantized(params, data),
data.per_channel_output_multiplier, data.per_channel_output_shift,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int16_t>(input),
tflite::micro::GetTensorShape(filter),
#ifdef USE_TFLM_COMPRESSION
tflite::micro::GetTensorData<int8_t>(micro_context, filter,
weights_comp_td,
data.weights_scratch_index),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int64_t>(
micro_context, bias, bias_comp_td, data.bias_scratch_index),
#else // USE_TFLM_COMPRESSION
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<std::int64_t>(bias),
#endif // USE_TFLM_COMPRESSION
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output));
} else {
MicroPrintf("Bias type %s (%d) not supported.",
TfLiteTypeGetName(bias->type), bias->type);
return kTfLiteError;
}
break;
}
case kTfLiteInt8: {
switch (filter->type) {
case kTfLiteInt4: {
int8_t* unpacked_filter_data = static_cast<int8_t*>(
context->GetScratchBuffer(context, data.filter_buffer_index));
tflite::tensor_utils::UnpackDenseInt4IntoInt8(
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(filter).FlatSize(),
unpacked_filter_data);
reference_integer_ops::ConvPerChannel(
ConvParamsQuantized(params, data),
data.per_channel_output_multiplier, data.per_channel_output_shift,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(filter), unpacked_filter_data,
tflite::micro::GetTensorShape(bias),
tflite::micro::GetOptionalTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
break;
}
case kTfLiteInt8: {
ConvPerChannelRVV(
ConvParamsQuantized(params, data),
data.per_channel_output_multiplier, data.per_channel_output_shift,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(filter),
#ifdef USE_TFLM_COMPRESSION
tflite::micro::GetTensorData<int8_t>(micro_context, filter,
weights_comp_td,
data.weights_scratch_index),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetOptionalTensorData<int32_t>(
micro_context, bias, bias_comp_td, data.bias_scratch_index),
#else // USE_TFLM_COMPRESSION
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetOptionalTensorData<int32_t>(bias),
#endif // USE_TFLM_COMPRESSION
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
break;
}
default:
MicroPrintf("Weight type %s (%d) not supported.",
TfLiteTypeGetName(filter->type), filter->type);
return kTfLiteError;
}
break;
}
default:
MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
input->type);
return kTfLiteError;
}
return kTfLiteOk;
}

} // namespace

TFLMRegistration Register_CONV_2D() {
return tflite::micro::RegisterOp(ConvInit, ConvPrepare, ConvEval);
}

} // namespace tflite
Loading