From 066652d02c19f6710e187537f1985c61341fe061 Mon Sep 17 00:00:00 2001 From: andresovela Date: Fri, 18 Oct 2024 14:49:42 +0200 Subject: [PATCH 01/11] Improve micro_profiler --- tensorflow/lite/micro/docs/profiling.md | 2 +- tensorflow/lite/micro/micro_profiler.cc | 123 ---------------- tensorflow/lite/micro/micro_profiler.h | 178 ++++++++++++++++++++---- tensorflow/lite/micro/micro_time.h | 8 ++ 4 files changed, 160 insertions(+), 151 deletions(-) delete mode 100644 tensorflow/lite/micro/micro_profiler.cc diff --git a/tensorflow/lite/micro/docs/profiling.md b/tensorflow/lite/micro/docs/profiling.md index 16c6b51917b..e76fb474fbe 100644 --- a/tensorflow/lite/micro/docs/profiling.md +++ b/tensorflow/lite/micro/docs/profiling.md @@ -26,7 +26,7 @@ from within operator kernels and other TFLite Micro routines. The MicroInterpreter class constructor contains an optional profiler argument. This profiler must be an instance of the tflite::Profiler class, and should implement the BeginEvent and EndEvent methods. There is a default implementation -in tensorflow/lite/micro/micro_profiler.cc which can be used for most purposes. +in tensorflow/lite/micro/micro_profiler.h which can be used for most purposes. The best practice for profiling across multiple invocations is to reset or call `ClearEvents()` in between invocations. diff --git a/tensorflow/lite/micro/micro_profiler.cc b/tensorflow/lite/micro/micro_profiler.cc deleted file mode 100644 index ebead51a90d..00000000000 --- a/tensorflow/lite/micro/micro_profiler.cc +++ /dev/null @@ -1,123 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/lite/micro/micro_profiler.h" - -#include -#include -#include - -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/micro/micro_log.h" -#include "tensorflow/lite/micro/micro_time.h" - -namespace tflite { - -uint32_t MicroProfiler::BeginEvent(const char* tag) { - if (num_events_ == kMaxEvents) { - MicroPrintf( - "MicroProfiler errored out because total number of events exceeded the " - "maximum of %d.", - kMaxEvents); - TFLITE_ASSERT_FALSE; - } - - tags_[num_events_] = tag; - start_ticks_[num_events_] = GetCurrentTimeTicks(); - end_ticks_[num_events_] = start_ticks_[num_events_] - 1; - return num_events_++; -} - -void MicroProfiler::EndEvent(uint32_t event_handle) { - TFLITE_DCHECK(event_handle < kMaxEvents); - end_ticks_[event_handle] = GetCurrentTimeTicks(); -} - -uint32_t MicroProfiler::GetTotalTicks() const { - int32_t ticks = 0; - for (int i = 0; i < num_events_; ++i) { - ticks += end_ticks_[i] - start_ticks_[i]; - } - return ticks; -} - -void MicroProfiler::Log() const { -#if !defined(TF_LITE_STRIP_ERROR_STRINGS) - for (int i = 0; i < num_events_; ++i) { - uint32_t ticks = end_ticks_[i] - start_ticks_[i]; - MicroPrintf("%s took %u ticks (%d ms).", tags_[i], ticks, TicksToMs(ticks)); - } -#endif -} - -void MicroProfiler::LogCsv() const { -#if !defined(TF_LITE_STRIP_ERROR_STRINGS) - MicroPrintf("\"Event\",\"Tag\",\"Ticks\""); - for (int i = 0; i < num_events_; ++i) { -#if defined(HEXAGON) || defined(CMSIS_NN) - int ticks = end_ticks_[i] - start_ticks_[i]; - MicroPrintf("%d,%s,%d", i, tags_[i], ticks); -#else - uint32_t ticks = end_ticks_[i] - start_ticks_[i]; - MicroPrintf("%d,%s,%" PRIu32, i, tags_[i], ticks); -#endif - } -#endif -} - -void MicroProfiler::LogTicksPerTagCsv() { -#if !defined(TF_LITE_STRIP_ERROR_STRINGS) - MicroPrintf( - "\"Unique Tag\",\"Total ticks across all events with that tag.\""); - int total_ticks = 0; - for (int i = 0; i < num_events_; ++i) { - uint32_t ticks = end_ticks_[i] - start_ticks_[i]; - TFLITE_DCHECK(tags_[i] != nullptr); - int position = FindExistingOrNextPosition(tags_[i]); - TFLITE_DCHECK(position >= 0); - total_ticks_per_tag[position].tag = tags_[i]; - total_ticks_per_tag[position].ticks = - total_ticks_per_tag[position].ticks + ticks; - total_ticks += ticks; - } - - for (int i = 0; i < num_events_; ++i) { - TicksPerTag each_tag_entry = total_ticks_per_tag[i]; - if (each_tag_entry.tag == nullptr) { - break; - } - MicroPrintf("%s, %d", each_tag_entry.tag, each_tag_entry.ticks); - } - MicroPrintf("\"total number of ticks\", %d", total_ticks); -#endif -} - -// This method finds a particular array element in the total_ticks_per_tag array -// with the matching tag_name passed in the method. If it can find a -// matching array element that has the same tag_name, then it will return the -// position of the matching element. But if it unable to find a matching element -// with the given tag_name, it will return the next available empty position -// from the array. -int MicroProfiler::FindExistingOrNextPosition(const char* tag_name) { - int pos = 0; - for (; pos < num_events_; pos++) { - TicksPerTag each_tag_entry = total_ticks_per_tag[pos]; - if (each_tag_entry.tag == nullptr || - strcmp(each_tag_entry.tag, tag_name) == 0) { - return pos; - } - } - return pos < num_events_ ? pos : -1; -} -} // namespace tflite diff --git a/tensorflow/lite/micro/micro_profiler.h b/tensorflow/lite/micro/micro_profiler.h index b52ebcb4ea9..e602c4fd8a3 100644 --- a/tensorflow/lite/micro/micro_profiler.h +++ b/tensorflow/lite/micro/micro_profiler.h @@ -16,15 +16,28 @@ limitations under the License. #ifndef TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_ #define TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_ +#include +#include +#include + +#include "tensorflow/lite/kernels/internal/compatibility.h" +#include "tensorflow/lite/micro/micro_log.h" +#include "tensorflow/lite/micro/micro_time.h" #include "tensorflow/lite/micro/compatibility.h" #include "tensorflow/lite/micro/micro_profiler_interface.h" namespace tflite { +enum class MicroProfilerLogFormat { + HumanReadable, + Csv, +}; + // MicroProfiler creates a common way to gain fine-grained insight into runtime -// performance. Bottleck operators can be identified along with slow code +// performance. Bottleneck operators can be identified along with slow code // sections. This can be used in conjunction with running the relevant micro // benchmark to evaluate end-to-end performance. +template class MicroProfiler : public MicroProfilerInterface { public: MicroProfiler() = default; @@ -33,59 +46,170 @@ class MicroProfiler : public MicroProfilerInterface { // Marks the start of a new event and returns an event handle that can be used // to mark the end of the event via EndEvent. The lifetime of the tag // parameter must exceed that of the MicroProfiler. - virtual uint32_t BeginEvent(const char* tag) override; + uint32_t BeginEvent(const char* tag) { + if (num_events_ == MAX_EVENTS) { + MicroPrintf( + "MicroProfiler errored out because total number of events exceeded the " + "maximum of %d.", + MAX_EVENTS); + TFLITE_ASSERT_FALSE; + } + + tags_[num_events_] = tag; + start_ticks_[num_events_] = GetCurrentTimeTicks(); + end_ticks_[num_events_] = start_ticks_[num_events_] - 1; + return num_events_++; + } // Marks the end of an event associated with event_handle. It is the // responsibility of the caller to ensure than EndEvent is called once and // only once per event_handle. // // If EndEvent is called more than once for the same event_handle, the last - // call will be used as the end of event marker. If EndEvent is called 0 times + // call will be used as the end of event marker.If EndEvent is called 0 times // for a particular event_handle, the duration of that event will be 0 ticks. - virtual void EndEvent(uint32_t event_handle) override; + void EndEvent(uint32_t event_handle) { + TFLITE_DCHECK(event_handle < MAX_EVENTS); + end_ticks_[event_handle] = GetCurrentTimeTicks(); + } // Clears all the events that have been currently profiled. - void ClearEvents() { num_events_ = 0; } + void ClearEvents() { + num_events_ = 0; + num_tag_groups_ = 0; + } // Returns the sum of the ticks taken across all the events. This number // is only meaningful if all of the events are disjoint (the end time of // event[i] <= start time of event[i+1]). - uint32_t GetTotalTicks() const; + uint32_t GetTotalTicks() const { + int32_t ticks = 0; + for (int i = 0; i < num_events_; ++i) { + ticks += end_ticks_[i] - start_ticks_[i]; + } + return ticks; + } // Prints the profiling information of each of the events in human readable // form. - void Log() const; + void Log(MicroProfilerLogFormat format) const { + #if !defined(TF_LITE_STRIP_ERROR_STRINGS) + switch (format) { + case MicroProfilerLogFormat::HumanReadable: + for (int i = 0; i < num_events_; ++i) { + uint32_t ticks = end_ticks_[i] - start_ticks_[i]; + uint64_t us = TicksToUs(ticks); + MicroPrintf("%s took %u.%u ms (%u ticks)", tags_[i], us / 1000, us % 1000, ticks); + } + break; - // Prints the profiling information of each of the events in CSV (Comma - // Separated Value) form. - void LogCsv() const; + case MicroProfilerLogFormat::Csv: + MicroPrintf("\"Event\",\"Tag\",\"Ms\",\"Ticks\""); + for (int i = 0; i < num_events_; ++i) { + #if defined(HEXAGON) || defined(CMSIS_NN) + int ticks = end_ticks_[i] - start_ticks_[i]; + MicroPrintf("%d,%s,%u,%d", i, tags_[i], TicksToMs(ticks), ticks); + #else + uint32_t ticks = end_ticks_[i] - start_ticks_[i]; + MicroPrintf("%d,%s,%" PRIu32 ",%" PRIu32, i, tags_[i], TicksToMs(ticks), ticks); + #endif + } + break; + } + #endif // !defined(TF_LITE_STRIP_ERROR_STRINGS) + } - // Prints total ticks for each unique tag in CSV format. - // Output will have one row for each unique tag along with the - // total ticks summed across all events with that particular tag. - void LogTicksPerTagCsv(); + // Prints the profiling information of each of the events in human readable + // form, grouped per tag, sorted by execution time. + void LogGrouped(MicroProfilerLogFormat format) { + #if !defined(TF_LITE_STRIP_ERROR_STRINGS) + for (int i = 0; i < num_events_; ++i) { + // Find if the tag already exists in uniqueTags + TagGroup& tag_group = GetTagGroup(tags_[i]); + + uint32_t ticks = end_ticks_[i] - start_ticks_[i]; + tag_group.tag = tags_[i]; + tag_group.ticks += ticks; + tag_group.tag_count++; + } + + SortTagGroups(); + + switch (format) { + case MicroProfilerLogFormat::HumanReadable: { + MicroPrintf("Cumulative event times:"); + MicroPrintf("%-8s %-32s %-12s %-12s", "Count", "Tag", "Ticks", "Time"); + uint64_t total_ticks = 0; + uint64_t us; + for (int i = 0; i < num_tag_groups_; ++i) { + total_ticks += tag_groups_[i].ticks; + us = TicksToUs(tag_groups_[i].ticks); + MicroPrintf("%-8d %-32s %-12d %u.%03u ms", tag_groups_[i].tag_count, tag_groups_[i].tag, tag_groups_[i].ticks, us / 1000, us % 1000); + } + us = TicksToUs(total_ticks); + MicroPrintf("\nTotal time - %-10lld %u.%03u ms", total_ticks, us / 1000, us % 1000); + break; + } + case MicroProfilerLogFormat::Csv: { + MicroPrintf( + "\"Tag\",\"Total ms\",\"Total ticks\""); + for (int i = 0; i < num_tag_groups_; ++i) { + MicroPrintf("%s, %u, %u", tag_groups_[i].tag, TicksToMs(tag_groups_[i].ticks), tag_groups_[i].ticks); + } + break; + } + } + #endif // !defined(TF_LITE_STRIP_ERROR_STRINGS) + } private: - // Maximum number of events that this class can keep track of. The - // MicroProfiler will abort if AddEvent is called more than kMaxEvents number - // of times. Increase this number if you need more events. - static constexpr int kMaxEvents = 4096; - - const char* tags_[kMaxEvents]; - uint32_t start_ticks_[kMaxEvents]; - uint32_t end_ticks_[kMaxEvents]; + const char* tags_[MAX_EVENTS]; + uint32_t start_ticks_[MAX_EVENTS]; + uint32_t end_ticks_[MAX_EVENTS]; int num_events_ = 0; + int num_tag_groups_ = 0; - struct TicksPerTag { + struct TagGroup { const char* tag; uint32_t ticks; + uint32_t tag_count; }; + // In practice, the number of tags will be much lower than the number of // events. But it is theoretically possible that each event to be unique and - // hence we allow total_ticks_per_tag to have kMaxEvents entries. - TicksPerTag total_ticks_per_tag[kMaxEvents] = {}; + // hence we allow total_ticks_per_tag to have MAX_EVENTS entries. + TagGroup tag_groups_[MAX_EVENTS] = {}; + + // Helper function to find the index of a tag in the cumulative array + TagGroup& GetTagGroup(const char *tag) { + for (int i = 0; i < num_tag_groups_; ++i) { + if (strcmp(tag_groups_[i].tag, tag) == 0) { + return tag_groups_[i]; + } + } + + // Tag not found, so we create a new entry + // There should always be space since the array of tag groups + // is just as big as the array of events + tag_groups_[num_tag_groups_].tag = tag; + tag_groups_[num_tag_groups_].ticks = 0; + tag_groups_[num_tag_groups_].tag_count = 0; + return tag_groups_[num_tag_groups_++]; + } - int FindExistingOrNextPosition(const char* tag_name); + // Helper function to sort the tag groups by ticks in descending order + // Simple bubble sort implementation + void SortTagGroups() { + for (int i = 0; i < num_tag_groups_ - 1; ++i) { + for (int j = i + 1; j < num_tag_groups_; ++j) { + if (tag_groups_[j].ticks > tag_groups_[i].ticks) { + TagGroup temp_tag_group = tag_groups_[i]; + tag_groups_[i] = tag_groups_[j]; + tag_groups_[j] = temp_tag_group; + } + } + } + } TF_LITE_REMOVE_VIRTUAL_DELETE }; @@ -93,7 +217,7 @@ class MicroProfiler : public MicroProfilerInterface { #if defined(TF_LITE_STRIP_ERROR_STRINGS) // For release builds, the ScopedMicroProfiler is a noop. // -// This is done because the ScipedProfiler is used as part of the +// This is done because the ScopedProfiler is used as part of the // MicroInterpreter and we want to ensure zero overhead for the release builds. class ScopedMicroProfiler { public: diff --git a/tensorflow/lite/micro/micro_time.h b/tensorflow/lite/micro/micro_time.h index 5e8ad11519a..bb16d3c20f2 100644 --- a/tensorflow/lite/micro/micro_time.h +++ b/tensorflow/lite/micro/micro_time.h @@ -34,6 +34,14 @@ inline uint32_t TicksToMs(int32_t ticks) { static_cast(_ticks_per_second)); } +inline uint64_t TicksToUs(int32_t ticks) { + uint64_t _ticks_per_second = ticks_per_second(); + _ticks_per_second = + _ticks_per_second > 0 ? _ticks_per_second : 1; // zero divide prevention + return static_cast(1000000.0f * static_cast(ticks) / + static_cast(_ticks_per_second)); +} + } // namespace tflite #endif // TENSORFLOW_LITE_MICRO_MICRO_TIME_H_ From adac799775f100080470ccc3ee50580fb86bf128 Mon Sep 17 00:00:00 2001 From: andresovela Date: Fri, 18 Oct 2024 14:51:23 +0200 Subject: [PATCH 02/11] Fix formatting --- tensorflow/lite/micro/micro_profiler.h | 145 +++++++++++++------------ 1 file changed, 75 insertions(+), 70 deletions(-) diff --git a/tensorflow/lite/micro/micro_profiler.h b/tensorflow/lite/micro/micro_profiler.h index e602c4fd8a3..45cafcf41ba 100644 --- a/tensorflow/lite/micro/micro_profiler.h +++ b/tensorflow/lite/micro/micro_profiler.h @@ -21,10 +21,10 @@ limitations under the License. #include #include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/micro/micro_log.h" -#include "tensorflow/lite/micro/micro_time.h" #include "tensorflow/lite/micro/compatibility.h" +#include "tensorflow/lite/micro/micro_log.h" #include "tensorflow/lite/micro/micro_profiler_interface.h" +#include "tensorflow/lite/micro/micro_time.h" namespace tflite { @@ -49,8 +49,8 @@ class MicroProfiler : public MicroProfilerInterface { uint32_t BeginEvent(const char* tag) { if (num_events_ == MAX_EVENTS) { MicroPrintf( - "MicroProfiler errored out because total number of events exceeded the " - "maximum of %d.", + "MicroProfiler errored out because total number of events exceeded " + "the maximum of %d.", MAX_EVENTS); TFLITE_ASSERT_FALSE; } @@ -74,7 +74,7 @@ class MicroProfiler : public MicroProfilerInterface { } // Clears all the events that have been currently profiled. - void ClearEvents() { + void ClearEvents() { num_events_ = 0; num_tag_groups_ = 0; } @@ -83,83 +83,88 @@ class MicroProfiler : public MicroProfilerInterface { // is only meaningful if all of the events are disjoint (the end time of // event[i] <= start time of event[i+1]). uint32_t GetTotalTicks() const { - int32_t ticks = 0; - for (int i = 0; i < num_events_; ++i) { - ticks += end_ticks_[i] - start_ticks_[i]; - } - return ticks; + int32_t ticks = 0; + for (int i = 0; i < num_events_; ++i) { + ticks += end_ticks_[i] - start_ticks_[i]; + } + return ticks; } // Prints the profiling information of each of the events in human readable // form. void Log(MicroProfilerLogFormat format) const { - #if !defined(TF_LITE_STRIP_ERROR_STRINGS) - switch (format) { - case MicroProfilerLogFormat::HumanReadable: - for (int i = 0; i < num_events_; ++i) { - uint32_t ticks = end_ticks_[i] - start_ticks_[i]; - uint64_t us = TicksToUs(ticks); - MicroPrintf("%s took %u.%u ms (%u ticks)", tags_[i], us / 1000, us % 1000, ticks); - } - break; - - case MicroProfilerLogFormat::Csv: - MicroPrintf("\"Event\",\"Tag\",\"Ms\",\"Ticks\""); - for (int i = 0; i < num_events_; ++i) { - #if defined(HEXAGON) || defined(CMSIS_NN) - int ticks = end_ticks_[i] - start_ticks_[i]; - MicroPrintf("%d,%s,%u,%d", i, tags_[i], TicksToMs(ticks), ticks); - #else - uint32_t ticks = end_ticks_[i] - start_ticks_[i]; - MicroPrintf("%d,%s,%" PRIu32 ",%" PRIu32, i, tags_[i], TicksToMs(ticks), ticks); - #endif - } - break; - } - #endif // !defined(TF_LITE_STRIP_ERROR_STRINGS) +#if !defined(TF_LITE_STRIP_ERROR_STRINGS) + switch (format) { + case MicroProfilerLogFormat::HumanReadable: + for (int i = 0; i < num_events_; ++i) { + uint32_t ticks = end_ticks_[i] - start_ticks_[i]; + uint64_t us = TicksToUs(ticks); + MicroPrintf("%s took %u.%u ms (%u ticks)", tags_[i], us / 1000, + us % 1000, ticks); + } + break; + + case MicroProfilerLogFormat::Csv: + MicroPrintf("\"Event\",\"Tag\",\"Ms\",\"Ticks\""); + for (int i = 0; i < num_events_; ++i) { +#if defined(HEXAGON) || defined(CMSIS_NN) + int ticks = end_ticks_[i] - start_ticks_[i]; + MicroPrintf("%d,%s,%u,%d", i, tags_[i], TicksToMs(ticks), ticks); +#else + uint32_t ticks = end_ticks_[i] - start_ticks_[i]; + MicroPrintf("%d,%s,%" PRIu32 ",%" PRIu32, i, tags_[i], + TicksToMs(ticks), ticks); +#endif + } + break; + } +#endif // !defined(TF_LITE_STRIP_ERROR_STRINGS) } // Prints the profiling information of each of the events in human readable // form, grouped per tag, sorted by execution time. void LogGrouped(MicroProfilerLogFormat format) { - #if !defined(TF_LITE_STRIP_ERROR_STRINGS) - for (int i = 0; i < num_events_; ++i) { - // Find if the tag already exists in uniqueTags - TagGroup& tag_group = GetTagGroup(tags_[i]); - - uint32_t ticks = end_ticks_[i] - start_ticks_[i]; - tag_group.tag = tags_[i]; - tag_group.ticks += ticks; - tag_group.tag_count++; - } +#if !defined(TF_LITE_STRIP_ERROR_STRINGS) + for (int i = 0; i < num_events_; ++i) { + // Find if the tag already exists in uniqueTags + TagGroup& tag_group = GetTagGroup(tags_[i]); + + uint32_t ticks = end_ticks_[i] - start_ticks_[i]; + tag_group.tag = tags_[i]; + tag_group.ticks += ticks; + tag_group.tag_count++; + } - SortTagGroups(); - - switch (format) { - case MicroProfilerLogFormat::HumanReadable: { - MicroPrintf("Cumulative event times:"); - MicroPrintf("%-8s %-32s %-12s %-12s", "Count", "Tag", "Ticks", "Time"); - uint64_t total_ticks = 0; - uint64_t us; - for (int i = 0; i < num_tag_groups_; ++i) { - total_ticks += tag_groups_[i].ticks; - us = TicksToUs(tag_groups_[i].ticks); - MicroPrintf("%-8d %-32s %-12d %u.%03u ms", tag_groups_[i].tag_count, tag_groups_[i].tag, tag_groups_[i].ticks, us / 1000, us % 1000); - } - us = TicksToUs(total_ticks); - MicroPrintf("\nTotal time - %-10lld %u.%03u ms", total_ticks, us / 1000, us % 1000); - break; + SortTagGroups(); + + switch (format) { + case MicroProfilerLogFormat::HumanReadable: { + MicroPrintf("Cumulative event times:"); + MicroPrintf("%-8s %-32s %-12s %-12s", "Count", "Tag", "Ticks", "Time"); + uint64_t total_ticks = 0; + uint64_t us; + for (int i = 0; i < num_tag_groups_; ++i) { + total_ticks += tag_groups_[i].ticks; + us = TicksToUs(tag_groups_[i].ticks); + MicroPrintf("%-8d %-32s %-12d %u.%03u ms", tag_groups_[i].tag_count, + tag_groups_[i].tag, tag_groups_[i].ticks, us / 1000, + us % 1000); } - case MicroProfilerLogFormat::Csv: { - MicroPrintf( - "\"Tag\",\"Total ms\",\"Total ticks\""); - for (int i = 0; i < num_tag_groups_; ++i) { - MicroPrintf("%s, %u, %u", tag_groups_[i].tag, TicksToMs(tag_groups_[i].ticks), tag_groups_[i].ticks); - } - break; + us = TicksToUs(total_ticks); + MicroPrintf("\nTotal time - %-10lld %u.%03u ms", total_ticks, us / 1000, + us % 1000); + break; + } + case MicroProfilerLogFormat::Csv: { + MicroPrintf("\"Tag\",\"Total ms\",\"Total ticks\""); + for (int i = 0; i < num_tag_groups_; ++i) { + MicroPrintf("%s, %u, %u", tag_groups_[i].tag, + TicksToMs(tag_groups_[i].ticks), tag_groups_[i].ticks); } + break; } - #endif // !defined(TF_LITE_STRIP_ERROR_STRINGS) + } +#endif // !defined(TF_LITE_STRIP_ERROR_STRINGS) } private: @@ -181,10 +186,10 @@ class MicroProfiler : public MicroProfilerInterface { TagGroup tag_groups_[MAX_EVENTS] = {}; // Helper function to find the index of a tag in the cumulative array - TagGroup& GetTagGroup(const char *tag) { + TagGroup& GetTagGroup(const char* tag) { for (int i = 0; i < num_tag_groups_; ++i) { if (strcmp(tag_groups_[i].tag, tag) == 0) { - return tag_groups_[i]; + return tag_groups_[i]; } } From f2502916c39614fba1685740a4862f7a3dca8694 Mon Sep 17 00:00:00 2001 From: andresovela Date: Fri, 18 Oct 2024 17:46:36 +0200 Subject: [PATCH 03/11] Fixed formatting issues --- tensorflow/lite/micro/micro_profiler.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/lite/micro/micro_profiler.h b/tensorflow/lite/micro/micro_profiler.h index 45cafcf41ba..0a7d45ec3d6 100644 --- a/tensorflow/lite/micro/micro_profiler.h +++ b/tensorflow/lite/micro/micro_profiler.h @@ -146,13 +146,13 @@ class MicroProfiler : public MicroProfilerInterface { for (int i = 0; i < num_tag_groups_; ++i) { total_ticks += tag_groups_[i].ticks; us = TicksToUs(tag_groups_[i].ticks); - MicroPrintf("%-8d %-32s %-12d %u.%03u ms", tag_groups_[i].tag_count, - tag_groups_[i].tag, tag_groups_[i].ticks, us / 1000, - us % 1000); + MicroPrintf("%-8d %-32s %-12d %" PRIu64 ".%03" PRIu64 " ms", + tag_groups_[i].tag_count, tag_groups_[i].tag, + tag_groups_[i].ticks, us / 1000, us % 1000); } us = TicksToUs(total_ticks); - MicroPrintf("\nTotal time - %-10lld %u.%03u ms", total_ticks, us / 1000, - us % 1000); + MicroPrintf("\nTotal time: %" PRIu64 ".%03" PRIu64 " ms (%lld ticks)", + us / 1000, us % 1000, total_ticks); break; } case MicroProfilerLogFormat::Csv: { From 91c0fdf203e8579f1f3ba971e65f4a9085ba6779 Mon Sep 17 00:00:00 2001 From: andresovela Date: Fri, 18 Oct 2024 18:05:01 +0200 Subject: [PATCH 04/11] Add decimals to missing logs --- tensorflow/lite/micro/micro_profiler.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tensorflow/lite/micro/micro_profiler.h b/tensorflow/lite/micro/micro_profiler.h index 0a7d45ec3d6..c2236b01675 100644 --- a/tensorflow/lite/micro/micro_profiler.h +++ b/tensorflow/lite/micro/micro_profiler.h @@ -99,8 +99,8 @@ class MicroProfiler : public MicroProfilerInterface { for (int i = 0; i < num_events_; ++i) { uint32_t ticks = end_ticks_[i] - start_ticks_[i]; uint64_t us = TicksToUs(ticks); - MicroPrintf("%s took %u.%u ms (%u ticks)", tags_[i], us / 1000, - us % 1000, ticks); + MicroPrintf("%s took %" PRIu64 ".%" PRIu64 " ms (%u ticks)", tags_[i], + us / 1000, us % 1000, ticks); } break; @@ -112,8 +112,9 @@ class MicroProfiler : public MicroProfilerInterface { MicroPrintf("%d,%s,%u,%d", i, tags_[i], TicksToMs(ticks), ticks); #else uint32_t ticks = end_ticks_[i] - start_ticks_[i]; - MicroPrintf("%d,%s,%" PRIu32 ",%" PRIu32, i, tags_[i], - TicksToMs(ticks), ticks); + uint64_t us = TicksToUs(ticks); + MicroPrintf("%d,%s,%" PRIu64 ".%" PRIu64 ",%" PRIu32, i, tags_[i], + us / 1000, us % 1000, ticks); #endif } break; @@ -158,8 +159,9 @@ class MicroProfiler : public MicroProfilerInterface { case MicroProfilerLogFormat::Csv: { MicroPrintf("\"Tag\",\"Total ms\",\"Total ticks\""); for (int i = 0; i < num_tag_groups_; ++i) { - MicroPrintf("%s, %u, %u", tag_groups_[i].tag, - TicksToMs(tag_groups_[i].ticks), tag_groups_[i].ticks); + uint64_t us = TicksToUs(tag_groups_[i].ticks); + MicroPrintf("%s, %" PRIu64 ".%" PRIu64 ", %u", tag_groups_[i].tag, + us / 1000, us % 1000, tag_groups_[i].ticks); } break; } From 8911d4c9d3b75556e31d6076118b82af7ab8187a Mon Sep 17 00:00:00 2001 From: andresovela Date: Mon, 21 Oct 2024 11:09:37 +0200 Subject: [PATCH 05/11] Address code review --- tensorflow/lite/micro/micro_profiler.h | 50 +++++++++++--------------- 1 file changed, 20 insertions(+), 30 deletions(-) diff --git a/tensorflow/lite/micro/micro_profiler.h b/tensorflow/lite/micro/micro_profiler.h index c2236b01675..1a96efc6dd3 100644 --- a/tensorflow/lite/micro/micro_profiler.h +++ b/tensorflow/lite/micro/micro_profiler.h @@ -16,6 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_ #define TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_ +#include #include #include #include @@ -37,7 +38,7 @@ enum class MicroProfilerLogFormat { // performance. Bottleneck operators can be identified along with slow code // sections. This can be used in conjunction with running the relevant micro // benchmark to evaluate end-to-end performance. -template +template class MicroProfiler : public MicroProfilerInterface { public: MicroProfiler() = default; @@ -46,7 +47,7 @@ class MicroProfiler : public MicroProfilerInterface { // Marks the start of a new event and returns an event handle that can be used // to mark the end of the event via EndEvent. The lifetime of the tag // parameter must exceed that of the MicroProfiler. - uint32_t BeginEvent(const char* tag) { + uint32_t BeginEvent(const char* tag) override { if (num_events_ == MAX_EVENTS) { MicroPrintf( "MicroProfiler errored out because total number of events exceeded " @@ -68,7 +69,7 @@ class MicroProfiler : public MicroProfilerInterface { // If EndEvent is called more than once for the same event_handle, the last // call will be used as the end of event marker.If EndEvent is called 0 times // for a particular event_handle, the duration of that event will be 0 ticks. - void EndEvent(uint32_t event_handle) { + void EndEvent(uint32_t event_handle) override { TFLITE_DCHECK(event_handle < MAX_EVENTS); end_ticks_[event_handle] = GetCurrentTimeTicks(); } @@ -90,9 +91,10 @@ class MicroProfiler : public MicroProfilerInterface { return ticks; } - // Prints the profiling information of each of the events in human readable - // form. - void Log(MicroProfilerLogFormat format) const { + // Prints the profiling information of each of the events with the + // given format (human readable by default). + void Log(MicroProfilerLogFormat format = + MicroProfilerLogFormat::HumanReadable) const { #if !defined(TF_LITE_STRIP_ERROR_STRINGS) switch (format) { case MicroProfilerLogFormat::HumanReadable: @@ -105,16 +107,14 @@ class MicroProfiler : public MicroProfilerInterface { break; case MicroProfilerLogFormat::Csv: - MicroPrintf("\"Event\",\"Tag\",\"Ms\",\"Ticks\""); + MicroPrintf("\"Event\",\"Tag\",\"Ticks\""); for (int i = 0; i < num_events_; ++i) { #if defined(HEXAGON) || defined(CMSIS_NN) int ticks = end_ticks_[i] - start_ticks_[i]; - MicroPrintf("%d,%s,%u,%d", i, tags_[i], TicksToMs(ticks), ticks); + MicroPrintf("%d,%s,%d", i, tags_[i], ticks); #else uint32_t ticks = end_ticks_[i] - start_ticks_[i]; - uint64_t us = TicksToUs(ticks); - MicroPrintf("%d,%s,%" PRIu64 ".%" PRIu64 ",%" PRIu32, i, tags_[i], - us / 1000, us % 1000, ticks); + MicroPrintf("%d,%s,%" PRIu32, i, tags_[i], ticks); #endif } break; @@ -122,6 +122,9 @@ class MicroProfiler : public MicroProfilerInterface { #endif // !defined(TF_LITE_STRIP_ERROR_STRINGS) } + // Prints the profiling information of each of the events in CSV format. + void LogCsv() const { Log(MicroProfilerLogFormat::Csv); } + // Prints the profiling information of each of the events in human readable // form, grouped per tag, sorted by execution time. void LogGrouped(MicroProfilerLogFormat format) { @@ -136,7 +139,10 @@ class MicroProfiler : public MicroProfilerInterface { tag_group.tag_count++; } - SortTagGroups(); + std::sort(tag_groups_, tag_groups_ + num_tag_groups_, + [](const TagGroup& a, const TagGroup& b) { + return a.ticks > b.ticks; // Sort in descending order + }); switch (format) { case MicroProfilerLogFormat::HumanReadable: { @@ -157,11 +163,9 @@ class MicroProfiler : public MicroProfilerInterface { break; } case MicroProfilerLogFormat::Csv: { - MicroPrintf("\"Tag\",\"Total ms\",\"Total ticks\""); + MicroPrintf("\"Tag\",\"Total ticks\""); for (int i = 0; i < num_tag_groups_; ++i) { - uint64_t us = TicksToUs(tag_groups_[i].ticks); - MicroPrintf("%s, %" PRIu64 ".%" PRIu64 ", %u", tag_groups_[i].tag, - us / 1000, us % 1000, tag_groups_[i].ticks); + MicroPrintf("%s,%u", tag_groups_[i].tag, tag_groups_[i].ticks); } break; } @@ -204,20 +208,6 @@ class MicroProfiler : public MicroProfilerInterface { return tag_groups_[num_tag_groups_++]; } - // Helper function to sort the tag groups by ticks in descending order - // Simple bubble sort implementation - void SortTagGroups() { - for (int i = 0; i < num_tag_groups_ - 1; ++i) { - for (int j = i + 1; j < num_tag_groups_; ++j) { - if (tag_groups_[j].ticks > tag_groups_[i].ticks) { - TagGroup temp_tag_group = tag_groups_[i]; - tag_groups_[i] = tag_groups_[j]; - tag_groups_[j] = temp_tag_group; - } - } - } - } - TF_LITE_REMOVE_VIRTUAL_DELETE }; From ec67287b47dd490fb2fde7e83a8fbb07438adb03 Mon Sep 17 00:00:00 2001 From: andresovela Date: Mon, 21 Oct 2024 13:35:59 +0200 Subject: [PATCH 06/11] Add LogTicksPerTagCsv() back --- tensorflow/lite/micro/micro_profiler.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tensorflow/lite/micro/micro_profiler.h b/tensorflow/lite/micro/micro_profiler.h index 1a96efc6dd3..af91f71c823 100644 --- a/tensorflow/lite/micro/micro_profiler.h +++ b/tensorflow/lite/micro/micro_profiler.h @@ -122,11 +122,7 @@ class MicroProfiler : public MicroProfilerInterface { #endif // !defined(TF_LITE_STRIP_ERROR_STRINGS) } - // Prints the profiling information of each of the events in CSV format. - void LogCsv() const { Log(MicroProfilerLogFormat::Csv); } - - // Prints the profiling information of each of the events in human readable - // form, grouped per tag, sorted by execution time. + // Prints the profiling information of each of the events, grouped by tag. void LogGrouped(MicroProfilerLogFormat format) { #if !defined(TF_LITE_STRIP_ERROR_STRINGS) for (int i = 0; i < num_events_; ++i) { @@ -173,6 +169,12 @@ class MicroProfiler : public MicroProfilerInterface { #endif // !defined(TF_LITE_STRIP_ERROR_STRINGS) } + // Convenience function to call Log with CSV format. + void LogCsv() const { Log(MicroProfilerLogFormat::Csv); } + + // Convenience function to call LogGrouped with CSV format. + void LogTicksPerTagCsv() const { LogGrouped(MicroProfilerLogFormat::Csv); } + private: const char* tags_[MAX_EVENTS]; uint32_t start_ticks_[MAX_EVENTS]; From dce2a28c44888fffa1459a3d2dc706388e85344d Mon Sep 17 00:00:00 2001 From: andresovela Date: Mon, 28 Oct 2024 15:06:22 +0100 Subject: [PATCH 07/11] Add missing zero padding to log --- tensorflow/lite/micro/micro_profiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/lite/micro/micro_profiler.h b/tensorflow/lite/micro/micro_profiler.h index af91f71c823..c44a53e61f1 100644 --- a/tensorflow/lite/micro/micro_profiler.h +++ b/tensorflow/lite/micro/micro_profiler.h @@ -101,7 +101,7 @@ class MicroProfiler : public MicroProfilerInterface { for (int i = 0; i < num_events_; ++i) { uint32_t ticks = end_ticks_[i] - start_ticks_[i]; uint64_t us = TicksToUs(ticks); - MicroPrintf("%s took %" PRIu64 ".%" PRIu64 " ms (%u ticks)", tags_[i], + MicroPrintf("%s took %" PRIu64 ".%03" PRIu64 " ms (%u ticks)", tags_[i], us / 1000, us % 1000, ticks); } break; From 9d3d6852eb627c4eff18f00fbb5fa94eb19af1f4 Mon Sep 17 00:00:00 2001 From: andresovela Date: Tue, 4 Mar 2025 14:03:01 +0100 Subject: [PATCH 08/11] Fix missing merge conflict --- tensorflow/lite/micro/micro_profiler.h | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/tensorflow/lite/micro/micro_profiler.h b/tensorflow/lite/micro/micro_profiler.h index 4c0b64b0849..3eb9c686631 100644 --- a/tensorflow/lite/micro/micro_profiler.h +++ b/tensorflow/lite/micro/micro_profiler.h @@ -75,14 +75,7 @@ class MicroProfiler : public MicroProfilerInterface { } // Clears all the events that have been currently profiled. -<<<<<<< HEAD - void ClearEvents() { - num_events_ = 0; - num_tag_groups_ = 0; - } -======= void ClearEvents(); ->>>>>>> main // Returns the sum of the ticks taken across all the events. This number // is only meaningful if all of the events are disjoint (the end time of @@ -194,13 +187,8 @@ class MicroProfiler : public MicroProfilerInterface { // In practice, the number of tags will be much lower than the number of // events. But it is theoretically possible that each event to be unique and -<<<<<<< HEAD // hence we allow total_ticks_per_tag to have MAX_EVENTS entries. TagGroup tag_groups_[MAX_EVENTS] = {}; -======= - // hence we allow total_ticks_per_tag to have kMaxEvents entries. - TicksPerTag total_ticks_per_tag_[kMaxEvents] = {}; ->>>>>>> main // Helper function to find the index of a tag in the cumulative array TagGroup& GetTagGroup(const char* tag) { From 505b5f94b8e687929a438425d57008b774b2524e Mon Sep 17 00:00:00 2001 From: andresovela Date: Tue, 4 Mar 2025 14:25:06 +0100 Subject: [PATCH 09/11] Fix bad merge --- tensorflow/lite/micro/micro_profiler.cc | 132 ------------------------ tensorflow/lite/micro/micro_profiler.h | 5 +- 2 files changed, 4 insertions(+), 133 deletions(-) delete mode 100644 tensorflow/lite/micro/micro_profiler.cc diff --git a/tensorflow/lite/micro/micro_profiler.cc b/tensorflow/lite/micro/micro_profiler.cc deleted file mode 100644 index e349bf73668..00000000000 --- a/tensorflow/lite/micro/micro_profiler.cc +++ /dev/null @@ -1,132 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/lite/micro/micro_profiler.h" - -#include -#include -#include - -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/micro/micro_log.h" -#include "tensorflow/lite/micro/micro_time.h" - -namespace tflite { - -uint32_t MicroProfiler::BeginEvent(const char* tag) { - if (num_events_ == kMaxEvents) { - MicroPrintf( - "MicroProfiler errored out because total number of events exceeded the " - "maximum of %d.", - kMaxEvents); - TFLITE_ASSERT_FALSE; - } - - tags_[num_events_] = tag; - start_ticks_[num_events_] = GetCurrentTimeTicks(); - end_ticks_[num_events_] = start_ticks_[num_events_] - 1; - return num_events_++; -} - -void MicroProfiler::EndEvent(uint32_t event_handle) { - TFLITE_DCHECK(event_handle < kMaxEvents); - end_ticks_[event_handle] = GetCurrentTimeTicks(); -} - -uint32_t MicroProfiler::GetTotalTicks() const { - int32_t ticks = 0; - for (int i = 0; i < num_events_; ++i) { - ticks += end_ticks_[i] - start_ticks_[i]; - } - return ticks; -} - -void MicroProfiler::Log() const { -#if !defined(TF_LITE_STRIP_ERROR_STRINGS) - for (int i = 0; i < num_events_; ++i) { - uint32_t ticks = end_ticks_[i] - start_ticks_[i]; - MicroPrintf("%s took %u ticks (%d ms).", tags_[i], ticks, TicksToMs(ticks)); - } -#endif -} - -void MicroProfiler::LogCsv() const { -#if !defined(TF_LITE_STRIP_ERROR_STRINGS) - MicroPrintf("\"Event\",\"Tag\",\"Ticks\""); - for (int i = 0; i < num_events_; ++i) { -#if defined(HEXAGON) || defined(CMSIS_NN) - int ticks = end_ticks_[i] - start_ticks_[i]; - MicroPrintf("%d,%s,%d", i, tags_[i], ticks); -#else - uint32_t ticks = end_ticks_[i] - start_ticks_[i]; - MicroPrintf("%d,%s,%" PRIu32, i, tags_[i], ticks); -#endif - } -#endif -} - -void MicroProfiler::LogTicksPerTagCsv() { -#if !defined(TF_LITE_STRIP_ERROR_STRINGS) - MicroPrintf( - "\"Unique Tag\",\"Total ticks across all events with that tag.\""); - int total_ticks = 0; - for (int i = 0; i < num_events_; ++i) { - uint32_t ticks = end_ticks_[i] - start_ticks_[i]; - TFLITE_DCHECK(tags_[i] != nullptr); - int position = FindExistingOrNextPosition(tags_[i]); - TFLITE_DCHECK(position >= 0); - total_ticks_per_tag_[position].tag = tags_[i]; - total_ticks_per_tag_[position].ticks = - total_ticks_per_tag_[position].ticks + ticks; - total_ticks += ticks; - } - - for (int i = 0; i < num_events_; ++i) { - TicksPerTag each_tag_entry = total_ticks_per_tag_[i]; - if (each_tag_entry.tag == nullptr) { - break; - } - MicroPrintf("%s, %d", each_tag_entry.tag, each_tag_entry.ticks); - } - MicroPrintf("\"total number of ticks\", %d", total_ticks); -#endif -} - -// This method finds a particular array element in the total_ticks_per_tag array -// with the matching tag_name passed in the method. If it can find a -// matching array element that has the same tag_name, then it will return the -// position of the matching element. But if it unable to find a matching element -// with the given tag_name, it will return the next available empty position -// from the array. -int MicroProfiler::FindExistingOrNextPosition(const char* tag_name) { - int pos = 0; - for (; pos < num_events_; pos++) { - TicksPerTag each_tag_entry = total_ticks_per_tag_[pos]; - if (each_tag_entry.tag == nullptr || - strcmp(each_tag_entry.tag, tag_name) == 0) { - return pos; - } - } - return pos < num_events_ ? pos : -1; -} - -void MicroProfiler::ClearEvents() { - for (int i = 0; i < num_events_; i++) { - total_ticks_per_tag_[i].tag = nullptr; - } - - num_events_ = 0; -} - -} // namespace tflite diff --git a/tensorflow/lite/micro/micro_profiler.h b/tensorflow/lite/micro/micro_profiler.h index 3eb9c686631..448c0dd8096 100644 --- a/tensorflow/lite/micro/micro_profiler.h +++ b/tensorflow/lite/micro/micro_profiler.h @@ -75,7 +75,10 @@ class MicroProfiler : public MicroProfilerInterface { } // Clears all the events that have been currently profiled. - void ClearEvents(); + void ClearEvents() { + num_events_ = 0; + num_tag_groups_ = 0; + } // Returns the sum of the ticks taken across all the events. This number // is only meaningful if all of the events are disjoint (the end time of From d221a34862e541f6b78a26d79539e3615393485c Mon Sep 17 00:00:00 2001 From: andresovela Date: Tue, 4 Mar 2025 14:26:33 +0100 Subject: [PATCH 10/11] Update copyright --- tensorflow/lite/micro/micro_profiler.h | 2 +- tensorflow/lite/micro/micro_time.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/micro/micro_profiler.h b/tensorflow/lite/micro/micro_profiler.h index 448c0dd8096..43b677969b4 100644 --- a/tensorflow/lite/micro/micro_profiler.h +++ b/tensorflow/lite/micro/micro_profiler.h @@ -1,4 +1,4 @@ -/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2025 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/tensorflow/lite/micro/micro_time.h b/tensorflow/lite/micro/micro_time.h index bb16d3c20f2..5d2f0aa69d8 100644 --- a/tensorflow/lite/micro/micro_time.h +++ b/tensorflow/lite/micro/micro_time.h @@ -1,4 +1,4 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2025 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. From ac38f943f5c2a100f04d5d08b7770810e90d6e70 Mon Sep 17 00:00:00 2001 From: andresovela Date: Tue, 4 Mar 2025 14:29:35 +0100 Subject: [PATCH 11/11] Fix formatting --- tensorflow/lite/micro/micro_profiler.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/micro/micro_profiler.h b/tensorflow/lite/micro/micro_profiler.h index 43b677969b4..c9d77043295 100644 --- a/tensorflow/lite/micro/micro_profiler.h +++ b/tensorflow/lite/micro/micro_profiler.h @@ -101,8 +101,8 @@ class MicroProfiler : public MicroProfilerInterface { for (int i = 0; i < num_events_; ++i) { uint32_t ticks = end_ticks_[i] - start_ticks_[i]; uint64_t us = TicksToUs(ticks); - MicroPrintf("%s took %" PRIu64 ".%03" PRIu64 " ms (%u ticks)", tags_[i], - us / 1000, us % 1000, ticks); + MicroPrintf("%s took %" PRIu64 ".%03" PRIu64 " ms (%u ticks)", + tags_[i], us / 1000, us % 1000, ticks); } break;