Skip to content

Commit cbf82e7

Browse files
arikTTnathan-TTCopilot
authored
Enable (very) basic compute kernels (#37328)
### Ticket N/A ### Problem description Quasar wasn't running any compute kernels ### What's changed This PR adds compute FW and support to run compute kernels on Neo0 triscs. It only alows to run basic code on hte trisc and doesn't support any real compute workloads. That work is included on a different branch that has LLK API changes etc. The goal of this PR is to unblock others that needs to test features that relate to triscs. ### Checklist - [x] [![All post-commit tests](https://github.com/tenstorrent/tt-metal/actions/workflows/all-post-commit-workflows.yaml/badge.svg?branch=ayaacob/compute_fw)](https://github.com/tenstorrent/tt-metal/actions/runs/21767356986) - [x] [![Blackhole Post commit](https://github.com/tenstorrent/tt-metal/actions/workflows/blackhole-post-commit.yaml/badge.svg?branch=ayaacob/compute_fw)](https://github.com/tenstorrent/tt-metal/actions/runs/21767228933) - [x] New/Existing tests provide coverage for changes - added a test that run a simple compute kernel that adds numbers on RISC-V and prints. --------- Co-authored-by: Nathan Sidwell <nsidwell@tenstorrent.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent 2e312df commit cbf82e7

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+9480
-5797
lines changed

tests/tt_metal/tt_metal/debug_tools/watcher/test_pause.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ void RunTest(MeshWatcherFixture* fixture, const std::shared_ptr<distributed::Mes
136136
for (uint32_t x = xy_start.x; x <= xy_end.x; x++) {
137137
for (uint32_t y = xy_start.y; y <= xy_end.y; y++) {
138138
CoreCoord virtual_core = device->worker_core_from_logical_core({x, y});
139-
for (const auto& risc_str : {" brisc", "ncrisc", "trisc0", "trisc1", "trisc2"}) {
139+
for (const auto& risc_str : {"BRISC", "NCRISC", "TRISC0", "TRISC1", "TRISC2"}) {
140140
std::string expected = fmt::format("{}:{}", virtual_core.str(), risc_str);
141141
expected_strings.push_back(expected);
142142
}

tests/tt_metal/tt_metal/debug_tools/watcher/test_stack_usage.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ using namespace tt::tt_metal;
2020
namespace {
2121
void RunOneTest(
2222
MeshWatcherFixture* fixture, const std::shared_ptr<distributed::MeshDevice>& mesh_device, unsigned free) {
23-
static const char* const names[] = {"brisc", "ncrisc", "trisc0", "trisc1", "trisc2", "aerisc", "ierisc"};
23+
static const char* const names[] = {"BRISC", "NCRISC", "TRISC0", "TRISC1", "TRISC2", "aerisc", "ierisc"};
2424
const std::string path = "tests/tt_metal/tt_metal/test_kernels/misc/watcher_stack.cpp";
2525
auto msg = [&](std::vector<std::string> &msgs, const char *cpu, unsigned free) {
2626
if (msgs.empty()) {

tests/tt_metal/tt_metal/sources.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ set(UNIT_TESTS_LEGACY_SRC
2424
test_multi_core_kernel.cpp
2525
test_multi_dm_add_two_ints.cpp
2626
test_multiple_programs.cpp
27+
test_quasar_basic_trisc.cpp
2728
test_sdpa_reduce_c.cpp
2829
test_single_dm_l1_write.cpp
2930
test_stress_noc_mcast.cpp
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// SPDX-FileCopyrightText: © 2026 Tenstorrent AI ULC
2+
//
3+
// SPDX-License-Identifier: Apache-2.0
4+
5+
#include "api/debug/dprint.h"
6+
7+
void kernel_main() {
8+
#ifdef TRISC_PACK
9+
int32_t A = 1;
10+
int32_t B = 2;
11+
12+
DPRINT << "TEST packer" << ENDL();
13+
DPRINT << A + B << ENDL();
14+
#endif
15+
16+
#ifdef TRISC_UNPACK
17+
int32_t A = 2;
18+
int32_t B = 2;
19+
20+
DPRINT << "TEST unpacker" << ENDL();
21+
DPRINT << A + B << ENDL();
22+
#endif
23+
24+
#ifdef TRISC_MATH
25+
int32_t A = 3;
26+
int32_t B = 2;
27+
28+
DPRINT << "TEST math" << ENDL();
29+
DPRINT << A + B << ENDL();
30+
#endif
31+
}

tests/tt_metal/tt_metal/test_kernels/dataflow/reader_bmm_8bank.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99

1010
void kernel_main() {
1111
// same arg indices as in reader_binary_diff_lengths for compat
12-
uint32_t src0_addr = get_arg_val<uint32_t>(0);
13-
uint32_t src1_addr = get_arg_val<uint32_t>(1);
12+
uintptr_t src0_addr = get_arg_val<uint32_t>(0);
13+
uintptr_t src1_addr = get_arg_val<uint32_t>(1);
1414
uint32_t Mt = get_arg_val<uint32_t>(2);
1515
uint32_t Kt = get_arg_val<uint32_t>(3);
1616
uint32_t Nt = get_arg_val<uint32_t>(4);

tests/tt_metal/tt_metal/test_kernels/dataflow/writer_bmm_8bank.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
void kernel_main() {
1111
// same arg indices as in reader_bmm_8bank for reuse
12-
uint32_t dst_addr = get_arg_val<uint32_t>(0);
12+
uintptr_t dst_addr = get_arg_val<uint32_t>(0);
1313
uint32_t Mt = get_arg_val<uint32_t>(2);
1414
uint32_t Nt = get_arg_val<uint32_t>(4);
1515
uint32_t batch = get_arg_val<uint32_t>(7);
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
// SPDX-FileCopyrightText: © 2026 Tenstorrent AI ULC
2+
//
3+
// SPDX-License-Identifier: Apache-2.0
4+
5+
#include "common/device_fixture.hpp"
6+
7+
#include <tt-metalium/device.hpp>
8+
#include <tt-metalium/distributed.hpp>
9+
#include <tt-metalium/host_api.hpp>
10+
#include <tt-metalium/experimental/host_api.hpp>
11+
#include <tt-metalium/tt_metal.hpp>
12+
13+
#ifndef OVERRIDE_KERNEL_PREFIX
14+
#define OVERRIDE_KERNEL_PREFIX ""
15+
#endif
16+
17+
using namespace tt;
18+
using namespace tt::tt_metal;
19+
20+
// This test requires simulator environment
21+
TEST_F(MeshDeviceSingleCardFixture, QuasarBasicTrisc) {
22+
// Skip if simulator is not available
23+
char* env_var = std::getenv("TT_METAL_SIMULATOR");
24+
if (env_var == nullptr) {
25+
GTEST_SKIP() << "This test can only be run using a simulator. Set TT_METAL_SIMULATOR environment variable.";
26+
}
27+
28+
auto mesh_device = devices_[0];
29+
30+
env_var = std::getenv("TT_METAL_DPRINT_CORES");
31+
if (env_var == nullptr) {
32+
std::cerr << "WARNING: Please set the environment variable TT_METAL_DPRINT_CORES to 0,0 to see the output of "
33+
"the Data Movement kernels."
34+
<< std::endl;
35+
std::cerr << "WARNING: For example, export TT_METAL_DPRINT_CORES=0,0" << std::endl;
36+
}
37+
38+
// We are going to use the first device (0) and the first core (0, 0) on the device.
39+
constexpr CoreCoord core = {0, 0};
40+
// Command queue lets us submit work (execute programs and read/write buffers) to the device.
41+
distributed::MeshCommandQueue& cq = mesh_device->mesh_command_queue();
42+
// Prepare a workload and a device coordinate range that spans the mesh.
43+
distributed::MeshWorkload workload;
44+
distributed::MeshCoordinateRange device_range = distributed::MeshCoordinateRange(mesh_device->shape());
45+
Program program = CreateProgram();
46+
47+
CreateKernel(
48+
program,
49+
OVERRIDE_KERNEL_PREFIX "tests/tt_metal/tt_metal/test_kernels/compute/risc_math.cpp",
50+
core,
51+
ComputeConfig{.compile_args = {}});
52+
53+
workload.add_program(device_range, std::move(program));
54+
distributed::EnqueueMeshWorkload(cq, workload, true);
55+
}

tests/tt_metal/tt_metal/test_single_dm_l1_write.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ TEST_F(MeshDeviceSingleCardFixture, SingleDmL1Write) {
6161
OVERRIDE_KERNEL_PREFIX "tests/tt_metal/tt_metal/test_kernels/dataflow/simple_l1_write.cpp",
6262
core,
6363
experimental::quasar::QuasarDataMovementConfig{
64-
.num_processors_per_cluster = 1, .named_compile_args = named_compile_time_args});
64+
.num_processors_per_cluster = 2, .named_compile_args = named_compile_time_args});
6565

6666
// Set Runtime Arguments for the Data Movement Kernel (memory address to write to)
6767
SetRuntimeArgs(program, data_movement_kernel_0, core, {address});

tt_metal/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ file(
160160
GLOB_RECURSE TT_LLK_HEADERS
161161
third_party/tt_llk/tt_llk_wormhole_b0/**/*.h
162162
third_party/tt_llk/tt_llk_blackhole/**/*.h
163+
third_party/tt_llk/tt_llk_quasar/**/*.h
163164
)
164165

165166
target_sources(

tt_metal/hw/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -457,6 +457,9 @@ foreach(ARCH_CPU IN LISTS TLS_ARCH_CPUS)
457457
-mcpu=${CPU}
458458
${GPP_FLAGS_common}
459459
)
460+
if(CPU STREQUAL "tt-qsr32")
461+
list(APPEND GPP_FLAGS -DCOMPILE_FOR_TRISC)
462+
endif()
460463
add_custom_command(
461464
OUTPUT
462465
${HW_OBJ_DIR}/${HW_OBJ_FILE}

0 commit comments

Comments
 (0)