Skip to content

Commit fdeefd6

Browse files
authored
Record min stack free, not max stack use (#22465)
### Ticket #19983 ### Problem description Moving to dynamic stack sizing means it is more useful to directly report the remaining free stack space. ### What's changed * Record remaining stack free, rather than size used. * Add stack size test cases * Implement '*' globing in the debug utils. * simplify `debug_stack_usage_t`. It was two arrays of uint16_t, now it is a single array of uint16_t pairs. There's a little excitement in that we need to record values 0 and up, but the buffer is also initialized to zero. We do offsets by 1 and unsigned arithmetic wraparound to avoid some comparisons. The stack sizes remain fixed, as before. ### Checklist - [YES] [All post commit](https://github.com/tenstorrent/tt-metal/actions/workflows/all-post-commit-workflows.yaml) CI passes - [YES] [Blackhole Post commit](https://github.com/tenstorrent/tt-metal/actions/workflows/blackhole-post-commit.yaml) CI with demo tests passes (if applicable) - [ ] [Model regression](https://github.com/tenstorrent/tt-metal/actions/workflows/perf-models.yaml) CI passes (if applicable) - [ ] [Device performance regression](https://github.com/tenstorrent/tt-metal/actions/workflows/perf-device-models.yaml) CI passes (if applicable) - [ ] (For models and ops writers) [Single-card demo tests](https://github.com/tenstorrent/tt-metal/actions/workflows/single-card-demo-tests.yaml) CI passes (if applicable) See [recommended dev flow](https://github.com/tenstorrent/tt-metal/blob/main/models/MODEL_ADD.md#a-recommended-dev-flow-on-github-for-adding-new-models). - [ ] New/Existing tests provide coverage for changes
1 parent 6c47279 commit fdeefd6

File tree

9 files changed

+292
-124
lines changed

9 files changed

+292
-124
lines changed

tests/tt_metal/tt_metal/debug_tools/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ set(UNIT_TESTS_DEBUG_TOOLS_SRC
1818
${CMAKE_CURRENT_SOURCE_DIR}/watcher/test_noc_sanitize.cpp
1919
${CMAKE_CURRENT_SOURCE_DIR}/watcher/test_pause.cpp
2020
${CMAKE_CURRENT_SOURCE_DIR}/watcher/test_ringbuf.cpp
21+
${CMAKE_CURRENT_SOURCE_DIR}/watcher/test_stack_usage.cpp
2122
${CMAKE_CURRENT_SOURCE_DIR}/watcher/test_waypoint.cpp
2223
)
2324

tests/tt_metal/tt_metal/debug_tools/debug_tools_test_utils.hpp

Lines changed: 77 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -34,42 +34,90 @@ inline void DumpFile(string file_name) {
3434
}
3535
}
3636

37-
// Wildcard is '?', just like a glob.
38-
inline bool StringCompareWithWildcard(const std::string_view str, const std::string_view pattern) {
39-
size_t pattern_size = pattern.size();
40-
if (pattern_size != str.size())
41-
return false;
42-
for (int idx = 0; idx < pattern_size; idx++) {
43-
if (str[idx] != pattern[idx] && pattern[idx] != '?') {
44-
return false;
37+
std::string_view::size_type FloatingGlobEndsAt(const std::string_view haystack,
38+
const std::string_view needle,
39+
unsigned globs);
40+
41+
// Check of pattern matches at the beginning of str.
42+
inline std::string_view::size_type AnchoredGlobEndsAt(const std::string_view str,
43+
const std::string_view pattern,
44+
unsigned globs) {
45+
if (str.size() + globs < pattern.size()) {
46+
return str.npos;
47+
}
48+
49+
for (std::string_view::size_type idx = 0; idx != pattern.size(); idx++) {
50+
if (pattern[idx] == '*') {
51+
auto result = FloatingGlobEndsAt(str.substr(idx), pattern.substr(idx + 1), globs - 1);
52+
if (result != str.npos) {
53+
// An empty suffix matches the whole string.
54+
result = result ? result + idx : str.size();
55+
}
56+
return result;
57+
} else if (idx >= str.size()) {
58+
return str.npos;
59+
} else if (pattern[idx] == '?') {
60+
continue;
61+
} else if (str[idx] != pattern[idx]) {
62+
return str.npos;
4563
}
4664
}
47-
return true;
65+
return pattern.size();;
4866
}
4967

50-
// Check if haystack contains needle, return true. needle may contain
51-
// '?' to match any character (just like glob).
52-
inline bool StringContainsWithWildcard(std::string_view haystack, std::string_view needle) {
53-
size_t needle_size = needle.size();
54-
if (needle_size == 0 || needle.front() == '?') {
55-
// The needle is empty, or begins with '?', fail in order to
56-
// force test to be fixed.
57-
return false;
58-
}
59-
if (needle_size > haystack.size()) {
60-
return false;
68+
// Look for needle in haystack. We look backwards through haystack, so
69+
// that glob use will find the longest match.
70+
inline std::string_view::size_type FloatingGlobEndsAt(const std::string_view haystack,
71+
const std::string_view needle,
72+
unsigned globs) {
73+
if (needle.empty()) {
74+
// Empty needle matches at end.
75+
return haystack.size();
6176
}
6277
char first = needle.front();
78+
if (first == '*') {
79+
// '*' at front, handle as an anchored glob.
80+
return AnchoredGlobEndsAt(haystack, needle, globs);
81+
}
82+
if (haystack.size() + globs < needle.size()) {
83+
return haystack.npos;
84+
}
6385

64-
for (size_t idx = 0, limit = haystack.size() - needle_size;
65-
(idx = haystack.find(first, idx)) <= limit; idx++) {
66-
std::string_view substr(&haystack[idx], needle_size);
67-
if (StringCompareWithWildcard(substr, needle)) {
68-
return true;
86+
for (std::string_view::size_type idx = haystack.size() + globs - needle.size();; idx--) {
87+
if (first != '?') {
88+
// no wildcard at front, scan for first char to begin search.
89+
idx = haystack.rfind(first, idx);
90+
if (idx == haystack.npos) {
91+
break;
92+
}
6993
}
94+
// Try an anchored match here.
95+
auto result = AnchoredGlobEndsAt(haystack.substr(idx), needle, globs);
96+
if (result != haystack.npos) {
97+
return result + idx;
98+
}
99+
if (!idx)
100+
break;
70101
}
71102

72-
return false;
103+
return haystack.npos;
104+
}
105+
106+
// Count the number of '*' characters.
107+
inline unsigned GlobCount(const std::string_view glob) {
108+
unsigned count = 0;
109+
for (std::string_view::size_type idx = 0; (idx = glob.find('*', idx)) != glob.npos; idx++)
110+
count++;
111+
return count;
112+
}
113+
// str matches pattern, allowing '?' and '*' globbing.
114+
inline bool StringMatchesGlob(const std::string_view str, const std::string_view pattern) {
115+
return AnchoredGlobEndsAt(str, pattern, GlobCount(pattern)) == str.size();
116+
}
117+
118+
// haystack contains needle, allowing '?' and '*' globbing.
119+
inline bool StringContainsGlob(const std::string_view haystack, const std::string_view needle) {
120+
return FloatingGlobEndsAt(haystack, needle, GlobCount(needle)) != haystack.npos;
73121
}
74122

75123
// Check whether the given file contains a list of strings in any order. Doesn't check for
@@ -99,7 +147,7 @@ inline bool FileContainsAllStrings(string file_name, const std::vector<string> &
99147
// Check for all target strings in the current line
100148
std::vector<std::string_view> found_on_current_line;
101149
for (const auto &s : must_contain_set) {
102-
if (StringContainsWithWildcard(line, s)) {
150+
if (StringContainsGlob(line, s)) {
103151
found_on_current_line.push_back(s);
104152
}
105153
}
@@ -149,7 +197,7 @@ inline bool FileContainsAllStringsInOrder(string file_name, const std::vector<st
149197

150198
// Check for all target strings in the current line
151199
for (; !must_contain_queue.empty(); must_contain_queue.pop_front()) {
152-
if (!StringContainsWithWildcard(line, must_contain_queue.front())) {
200+
if (!StringContainsGlob(line, must_contain_queue.front())) {
153201
break;
154202
}
155203
}
@@ -190,7 +238,7 @@ inline bool FilesMatchesString(string file_name, const string& expected) {
190238
int line_num = 0;
191239
while (getline(file, line_a) && getline(expect_stream, line_b)) {
192240
line_num++;
193-
if (!StringCompareWithWildcard(line_a, line_b)) {
241+
if (!StringMatchesGlob(line_a, line_b)) {
194242
tt::log_info(
195243
tt::LogTest,
196244
"Test Error: Line {} of {} did not match expected:\n\t{}\n\t{}",
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
2+
//
3+
// SPDX-License-Identifier: Apache-2.0
4+
5+
#include "command_queue_fixture.hpp"
6+
7+
#include <tt-metalium/allocator.hpp>
8+
#include <tt-metalium/core_coord.hpp>
9+
#include <tt-metalium/host_api.hpp>
10+
#include <tt-metalium/kernel.hpp>
11+
#include <tt-metalium/kernel_types.hpp>
12+
#include <gtest/gtest.h>
13+
#include "debug_tools_fixture.hpp"
14+
#include "debug_tools_test_utils.hpp"
15+
#include <watcher_server.hpp>
16+
#include <fmt/base.h>
17+
#include <string>
18+
#include <vector>
19+
20+
using namespace tt;
21+
using namespace tt::tt_metal;
22+
23+
namespace {
24+
void RunOneTest(WatcherFixture* fixture, IDevice* device, unsigned free) {
25+
static const char *const names[] =
26+
{"brisc", "ncrisc", "trisc0", "trisc1", "trisc2", "aerisc", "ierisc"};
27+
const std::string path = "tests/tt_metal/tt_metal/test_kernels/misc/watcher_stack.cpp";
28+
auto msg = [&](std::vector<std::string> &msgs, const char *cpu, unsigned free) {
29+
if (msgs.empty()) {
30+
msgs.push_back("Stack usage summary:");
31+
}
32+
msgs.push_back(fmt::format("{} highest stack usage: {} bytes free, on core "
33+
"* running kernel {} ({})",
34+
cpu, free, path, !free ? "OVERFLOW" : "Close to overflow"));
35+
};
36+
37+
// Set up program
38+
Program program = Program();
39+
CoreCoord coord = {0, 0};
40+
std::vector<uint32_t> compile_args{free};
41+
std::vector<string> expected;
42+
43+
CreateKernel(program, path, coord,
44+
DataMovementConfig{.processor = DataMovementProcessor::RISCV_0,
45+
.noc = NOC::RISCV_0_default,
46+
.compile_args = compile_args});
47+
msg(expected, names[0], free);
48+
49+
CreateKernel(program, path, coord,
50+
DataMovementConfig{.processor = DataMovementProcessor::RISCV_1,
51+
.noc = NOC::RISCV_1_default,
52+
.compile_args = compile_args});
53+
msg(expected, names[1], free);
54+
55+
CreateKernel(program, path, coord, ComputeConfig{.compile_args = compile_args});
56+
for (unsigned ix = 0; ix != 2; ix++) {
57+
msg(expected, names[2 + ix], free);
58+
}
59+
60+
// Also run on idle ethernet, if present
61+
auto const &inactive_eth_cores = device->get_inactive_ethernet_cores();
62+
if (!inactive_eth_cores.empty() && fixture->IsSlowDispatch()) {
63+
// Just pick the first core
64+
CoreCoord idle_coord = CoreCoord(*inactive_eth_cores.begin());
65+
CreateKernel(program, path, idle_coord,
66+
tt_metal::EthernetConfig{.eth_mode = Eth::IDLE, .noc = tt_metal::NOC::NOC_0,
67+
.processor = DataMovementProcessor::RISCV_0, .compile_args = compile_args});
68+
msg(expected, names[6], free);
69+
}
70+
71+
fixture->RunProgram(device, program, true);
72+
73+
EXPECT_TRUE(FileContainsAllStringsInOrder(fixture->log_file_name, expected));
74+
}
75+
76+
template<uint32_t Free>
77+
void RunTest(WatcherFixture* fixture, IDevice* device) {
78+
RunOneTest(fixture, device, Free);
79+
}
80+
81+
} // namespace
82+
83+
TEST_F(WatcherFixture, TestWatcherStackUsage0) {
84+
for (IDevice* device : this->devices_) {
85+
this->RunTestOnDevice(RunTest<0>, device);
86+
}
87+
}
88+
89+
TEST_F(WatcherFixture, TestWatcherStackUsage16) {
90+
for (IDevice* device : this->devices_) {
91+
this->RunTestOnDevice(RunTest<16>, device);
92+
}
93+
}
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
// SPDX-FileCopyrightText: © 2025 Tenstorrent Inc.
2+
//
3+
// SPDX-License-Identifier: Apache-2.0
4+
5+
// Scribble on the stack to check stack usage detection.
6+
7+
#include "compile_time_args.h"
8+
#include <dev_mem_map.h>
9+
10+
static uint32_t get_stack_base() {
11+
#if defined(COMPILE_FOR_BRISC)
12+
return MEM_BRISC_STACK_TOP - MEM_BRISC_STACK_SIZE;
13+
#elif defined(COMPILE_FOR_NCRISC)
14+
return MEM_NCRISC_STACK_TOP - MEM_NCRISC_STACK_SIZE;
15+
#elif defined(COMPILE_FOR_IDLE_ERISC)
16+
#if COMPILE_FOR_IDLE_ERISC == 0
17+
return MEM_IERISC_STACK_TOP - MEM_IERISC_STACK_SIZE;
18+
#elif COMPILE_FOR_IDLE_ERISC == 1
19+
return MEM_SLAVE_IERISC_STACK_TOP - MEM_SLAVE_IERISC_STACK_SIZE;
20+
#else
21+
#error "idle erisc get_stack_base unknown"
22+
#endif
23+
#elif defined(COMPILE_FOR_TRISC)
24+
#if COMPILE_FOR_TRISC == 0
25+
return MEM_TRISC0_STACK_TOP - MEM_TRISC0_STACK_SIZE;
26+
#elif COMPILE_FOR_TRISC == 1
27+
return MEM_TRISC1_STACK_TOP - MEM_TRISC1_STACK_SIZE;
28+
#elif COMPILE_FOR_TRISC == 2
29+
return MEM_TRISC2_STACK_TOP - MEM_TRISC2_STACK_SIZE;
30+
#else
31+
#error "trisc get_stack_base unknown"
32+
#endif
33+
#else
34+
#error "get_stack_base unknown"
35+
#endif
36+
}
37+
38+
#if defined(COMPILE_FOR_TRISC)
39+
#include "compute_kernel_api/common.h"
40+
namespace NAMESPACE {
41+
void MAIN {
42+
#else
43+
void kernel_main() {
44+
#endif
45+
uint32_t usage = get_compile_time_arg_val (0);
46+
auto base = (uint32_t tt_l1_ptr *)get_stack_base();
47+
auto point = &base[usage/sizeof(uint32_t)];
48+
uint32_t *sp;
49+
asm ("mv %0,sp" : "=r"(sp));
50+
51+
// Do not scribble above stack pointer.
52+
if (sp > point)
53+
*point = 0;
54+
}
55+
#if defined(COMPILE_FOR_TRISC)
56+
} // namespace NAMESPACE
57+
#endif

0 commit comments

Comments
 (0)