Skip to content

Commit 44d7c65

Browse files
committed
tutorial: floating-point-emulation: fix bounds check
1 parent 5166ee9 commit 44d7c65

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

tutorials/floating-point-emulation/notebooks/02-Matmul-Fundamentals/02.01-MatmulFundamentals.ipynb

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@
241241
" auto [size_m, size_n] = tensor_c.shape();\n",
242242
" auto size_k = tutorial::size<1>(tensor_a);\n",
243243
"\n",
244-
" if (thread_row_idx > size_m || thread_col_idx > size_n) {\n",
244+
" if (thread_row_idx >= size_m || thread_col_idx >= size_n) {\n",
245245
" return;\n",
246246
" }\n",
247247
"\n",
@@ -395,6 +395,9 @@
395395
" thread_row_idx = cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x\n",
396396
" thread_col_idx = cuda.threadIdx.y + cuda.blockIdx.y * cuda.blockDim.y\n",
397397
"\n",
398+
" if thread_row_idx >= m or thread_col_idx >= n:\n",
399+
" return\n",
400+
"\n",
398401
" accumulator = 0.0\n",
399402
"\n",
400403
" # EXERCISE --> Complete the following implementation to compute the dot product between row 'thread_row_idx' of matrix A \n",

0 commit comments

Comments
 (0)