Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from models.demos.vision.segmentation.vanilla_unet.reference.model import UNet

VANILLA_UNET_L1_SMALL_SIZE = 12 * 8192
VANILLA_UNET_TRACE_SIZE = 256 * 1024
VANILLA_UNET_TRACE_SIZE = 258048
VANILLA_UNET_PCC_WH = 0.97700


Expand Down
2 changes: 1 addition & 1 deletion tests/tt_metal/distributed/test_end_to_end_eltwise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ class MeshEndToEnd2x4TraceTests : public MeshDeviceFixtureBase {
MeshDeviceFixtureBase(Config{
.mesh_shape = MeshShape{2, 4},
.num_cqs = 2,
.trace_region_size = 3072, // 1024 per workload necessary
.trace_region_size = 12288, // 1024 per workload necessary
}) {}
};

Expand Down
5 changes: 3 additions & 2 deletions tt_metal/distributed/mesh_trace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,10 @@ std::shared_ptr<MeshTraceBuffer> MeshTrace::create_empty_mesh_trace_buffer() {

void MeshTrace::populate_mesh_buffer(MeshCommandQueue& mesh_cq, std::shared_ptr<MeshTraceBuffer>& trace_buffer) {
uint64_t unpadded_size = trace_buffer->desc->total_trace_size;
auto num_banks = mesh_cq.device()->allocator()->get_num_banks(BufferType::DRAM);
size_t page_size = trace_dispatch::compute_interleaved_trace_buf_page_size(
unpadded_size, mesh_cq.device()->allocator()->get_num_banks(BufferType::DRAM));
size_t padded_size = round_up(unpadded_size, page_size);
unpadded_size, num_banks);
size_t padded_size = round_up(unpadded_size, page_size * num_banks);

const auto current_trace_buffers_size = mesh_cq.device()->get_trace_buffers_size();
mesh_cq.device()->set_trace_buffers_size(current_trace_buffers_size + padded_size);
Expand Down
10 changes: 8 additions & 2 deletions tt_metal/impl/allocator/allocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,13 @@ void AllocatorImpl::validate_bank_assignments() const {

void AllocatorImpl::init_one_bank_per_channel() {
// DRAM bank is between unreserved start and trace_region start: UNRESERVED | DRAM BANK | TRACE REGION
DeviceAddr dram_bank_size = config_->dram_bank_size - config_->dram_unreserved_base - config_->trace_region_size;
TT_FATAL(
config_->trace_region_size % config_->num_dram_channels == 0,
"config_->trace_region_size {} should be multiple of config_->num_dram_channels {}",
config_->trace_region_size,
config_->num_dram_channels);
auto trace_region_size_per_bank = config_->trace_region_size / config_->num_dram_channels;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should assert that config_->trace_region_size % config_->num_dram_channels == 0.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

    TT_FATAL(
        config_->trace_region_size % config_->num_dram_channels == 0,
        "config_->trace_region_size {} should be multiple of config_->num_dram_channels {}",
        config_->trace_region_size,
        config_->num_dram_channels);

I added assert
add assert

DeviceAddr dram_bank_size = config_->dram_bank_size - config_->dram_unreserved_base - trace_region_size_per_bank;
std::vector<int64_t> bank_offsets(config_->num_dram_channels);
for (uint32_t channel_id = 0; channel_id < config_->num_dram_channels; channel_id++) {
bank_offsets.at(channel_id) = static_cast<int32_t>(config_->dram_bank_offsets.at(channel_id));
Expand All @@ -57,7 +63,7 @@ void AllocatorImpl::init_one_bank_per_channel() {
trace_buffer_manager_ = std::make_unique<BankManager>(
BufferType::TRACE,
bank_offsets,
config_->trace_region_size,
trace_region_size_per_bank,
config_->dram_alignment,
dram_bank_size + config_->dram_unreserved_base,
config_->disable_interleaved);
Expand Down
5 changes: 4 additions & 1 deletion tt_metal/impl/allocator/l1_banking_allocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,9 @@ AllocatorConfig L1BankingAllocator::generate_config(
// Tensix/Eth <-> Tensix/Eth src and dst addrs must be L1_ALIGNMENT aligned
const auto& logical_size = soc_desc.get_grid_size(CoreType::TENSIX);
const auto& compute_size = tt::get_compute_grid_size(device_id, num_hw_cqs, dispatch_core_config);

size_t trace_region_alignment = hal.get_alignment(HalMemType::DRAM) * soc_desc.get_num_dram_views();
size_t aligned_trace_region_size = (trace_region_size + trace_region_alignment - 1) / trace_region_alignment * trace_region_alignment;
AllocatorConfig config(
{.num_dram_channels = static_cast<size_t>(soc_desc.get_num_dram_views()),
.dram_bank_size = soc_desc.dram_view_size,
Expand All @@ -198,7 +201,7 @@ AllocatorConfig L1BankingAllocator::generate_config(
.worker_grid = CoreRangeSet(CoreRange(CoreCoord(0, 0), CoreCoord(logical_size.x - 1, logical_size.y - 1))),
.worker_l1_size = static_cast<size_t>(soc_desc.worker_l1_size),
.l1_small_size = align(l1_small_size, hal.get_alignment(HalMemType::DRAM)),
.trace_region_size = align(trace_region_size, hal.get_alignment(HalMemType::DRAM)),
.trace_region_size = aligned_trace_region_size,
.core_type_from_noc_coord_table = {}, // Populated later
.worker_log_to_virtual_routing_x = cluster.get_worker_logical_to_virtual_x(device_id),
.worker_log_to_virtual_routing_y = cluster.get_worker_logical_to_virtual_y(device_id),
Expand Down
Loading