Skip to content

Commit 042eea8

Browse files
committed
Fix: Harden Python binding error paths and shape-overflow guards
Add nk_size_mul_checked_ to types.h and route the shape-product and cdist size computations in python/{each,distance}.c through it, so a buffer reporting an overflowing shape raises OverflowError instead of wrapping into an undersized allocation walked at the true extent. The elementwise scalar-array paths returned NULL without setting an exception when the buffer dtype is unsupported (a CPython protocol violation); they now raise TypeError. The packed and symmetric matrix verbs leaked the owned output tensor on an invalid row range; they now Py_DECREF it. The DLPack importer propagates a PyObject_IsTrue error and nulls owner->managed if PyCapsule_SetName fails, avoiding a double-free of the producer tensor.
1 parent 2e3db50 commit 042eea8

5 files changed

Lines changed: 81 additions & 16 deletions

File tree

include/numkong/types.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1748,6 +1748,14 @@ NK_PUBLIC nk_size_t nk_size_round_up_to_multiple_(nk_size_t number, nk_size_t di
17481748
return nk_size_divide_round_up_(number, divisor) * divisor;
17491749
}
17501750

1751+
/** @brief Multiplies two sizes with overflow detection. Writes the product and returns 1 on success;
1752+
* returns 0 (leaving @p product unchanged) when @p a * @p b would overflow `nk_size_t`. */
1753+
NK_PUBLIC int nk_size_mul_checked_(nk_size_t a, nk_size_t b, nk_size_t *product) NK_STREAMING_COMPATIBLE_ {
1754+
if (b != 0 && a > NK_SIZE_MAX / b) return 0;
1755+
*product = a * b;
1756+
return 1;
1757+
}
1758+
17511759
NK_INTERNAL nk_f32_t nk_f32_abs_(nk_f32_t x) { return x < 0 ? -x : x; }
17521760
NK_INTERNAL nk_f64_t nk_f64_abs_(nk_f64_t x) { return x < 0 ? -x : x; }
17531761
NK_INTERNAL nk_i64_t nk_i64_abs_(nk_i64_t x) { return x < 0 ? -x : x; }

python/distance.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -815,7 +815,11 @@ static PyObject *implement_cdist( //
815815
goto cleanup;
816816
}
817817

818-
size_t const count_pairs = a_parsed.rows * b_parsed.rows;
818+
size_t count_pairs;
819+
if (!nk_size_mul_checked_(a_parsed.rows, b_parsed.rows, &count_pairs)) {
820+
PyErr_SetString(PyExc_OverflowError, "cdist result size (a_rows * b_rows) overflows size_t");
821+
goto cleanup;
822+
}
819823
char *distances_start = NULL;
820824
size_t distances_rows_stride_bytes = 0;
821825
size_t distances_cols_stride_bytes = 0;

python/dlpack_interop.c

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -271,10 +271,16 @@ static int nk_fill_dl_tensor(Tensor *tensor, DLTensor *out, nk_dlpack_export_ctx
271271
if (!versioned && (tensor->dtype == nk_e2m3_k || tensor->dtype == nk_e3m2_k || tensor->dtype == nk_e2m1_k)) {
272272
PyErr_SetString( //
273273
PyExc_TypeError,
274-
"Sub-byte FP4/FP6 (e2m1/e2m3/e3m2) DLPack export requires max_version >= (1, 0) so " "the "
275-
"IS_SUBBYTE_TYPE_"
276-
"PADDED flag can "
277-
"be set");
274+
"Sub-byte FP4/FP6 (e2m1/e2m3/e3m2) DLPack export requires max_version >= (1, 0) so " "the " "IS_"
275+
"SUBBYTE_"
276+
"TYPE_" "PA"
277+
"DD"
278+
"ED"
279+
" f"
280+
"la"
281+
"g "
282+
"ca"
283+
"n " "be set");
278284
}
279285
else { PyErr_Format(PyExc_TypeError, "dtype %d has no DLPack mapping", (int)tensor->dtype); }
280286
return -1;
@@ -393,7 +399,9 @@ PyObject *Tensor_dlpack(PyObject *self, PyObject *args, PyObject *kwargs) {
393399

394400
Tensor *tensor = (Tensor *)self;
395401

396-
if (copy != Py_None && PyObject_IsTrue(copy)) {
402+
int const requested_copy = (copy != Py_None) ? PyObject_IsTrue(copy) : 0;
403+
if (requested_copy < 0) return NULL; // bool(copy) raised
404+
if (requested_copy) {
397405
PyErr_SetString(PyExc_BufferError, "NumKong DLPack exporter only supports zero-copy (copy=False)");
398406
return NULL;
399407
}
@@ -630,13 +638,24 @@ PyObject *api_from_dlpack(PyObject *self, PyObject *obj) {
630638
nk_dlpack_owner_t *owner = PyObject_New(nk_dlpack_owner_t, &DLPackOwnerType);
631639
if (!owner) goto fail;
632640
owner->is_versioned = is_versioned;
641+
// Rename the capsule so the producer's destructor becomes a no-op and ownership transfers to `owner`.
642+
// If the rename fails, leave the capsule's original name so the producer still frees it, and null
643+
// `owner->managed` so our dealloc does not free it a second time.
633644
if (is_versioned) {
634645
owner->managed = PyCapsule_GetPointer(capsule, "dltensor_versioned");
635-
PyCapsule_SetName(capsule, "used_dltensor_versioned");
646+
if (PyCapsule_SetName(capsule, "used_dltensor_versioned") != 0) {
647+
owner->managed = NULL;
648+
Py_DECREF(owner);
649+
goto fail;
650+
}
636651
}
637652
else {
638653
owner->managed = PyCapsule_GetPointer(capsule, "dltensor");
639-
PyCapsule_SetName(capsule, "used_dltensor");
654+
if (PyCapsule_SetName(capsule, "used_dltensor") != 0) {
655+
owner->managed = NULL;
656+
Py_DECREF(owner);
657+
goto fail;
658+
}
640659
}
641660

642661
// Build the Tensor view directly (parallels Tensor_view_object in tensor.c,

python/each.c

Lines changed: 40 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -536,7 +536,11 @@ static PyObject *add_scalar_array(PyObject *array_obj, PyObject *scalar_obj, PyO
536536

537537
nk_dtype_t dtype = resolve_nk_dtype_in_py_buffer(&a_buffer);
538538
if (out_dtype_obj) { dtype = py_object_to_nk_dtype(out_dtype_obj); }
539-
if (dtype == nk_dtype_unknown_k) goto cleanup;
539+
if (dtype == nk_dtype_unknown_k) {
540+
if (!PyErr_Occurred())
541+
PyErr_SetString(PyExc_TypeError, "unsupported buffer dtype for the requested elementwise operation");
542+
goto cleanup;
543+
}
540544

541545
nk_each_scale_punned_t scale_kernel = NULL;
542546
nk_capability_t capability = nk_cap_serial_k;
@@ -555,7 +559,11 @@ static PyObject *add_scalar_array(PyObject *array_obj, PyObject *scalar_obj, PyO
555559

556560
size_t const element_size = nk_dtype_bytes_per_value(dtype);
557561
size_t total_elements = 1;
558-
for (int dim = 0; dim < a_buffer.ndim; dim++) total_elements *= (size_t)a_buffer.shape[dim];
562+
for (int dim = 0; dim < a_buffer.ndim; dim++)
563+
if (!nk_size_mul_checked_(total_elements, (size_t)a_buffer.shape[dim], &total_elements)) {
564+
PyErr_SetString(PyExc_OverflowError, "tensor element count overflows size_t");
565+
goto cleanup;
566+
}
559567

560568
char *result_data = NULL;
561569
Py_ssize_t result_strides[NK_TENSOR_MAX_RANK];
@@ -655,7 +663,11 @@ static PyObject *add_array_array(PyObject *a_obj, PyObject *b_obj, PyObject *out
655663
}
656664

657665
if (out_dtype_obj) { dtype = py_object_to_nk_dtype(out_dtype_obj); }
658-
if (dtype == nk_dtype_unknown_k) goto cleanup;
666+
if (dtype == nk_dtype_unknown_k) {
667+
if (!PyErr_Occurred())
668+
PyErr_SetString(PyExc_TypeError, "unsupported buffer dtype for the requested elementwise operation");
669+
goto cleanup;
670+
}
659671

660672
nk_each_sum_punned_t sum_kernel = NULL;
661673
nk_capability_t capability = nk_cap_serial_k;
@@ -668,7 +680,11 @@ static PyObject *add_array_array(PyObject *a_obj, PyObject *b_obj, PyObject *out
668680

669681
int const num_dims = a_buffer.ndim;
670682
size_t total_elements = 1;
671-
for (int dim = 0; dim < num_dims; dim++) total_elements *= (size_t)a_buffer.shape[dim];
683+
for (int dim = 0; dim < num_dims; dim++)
684+
if (!nk_size_mul_checked_(total_elements, (size_t)a_buffer.shape[dim], &total_elements)) {
685+
PyErr_SetString(PyExc_OverflowError, "tensor element count overflows size_t");
686+
goto cleanup;
687+
}
672688

673689
a_promoted = ensure_contiguous_buffer(a_buffer.buf, a_dtype, dtype, num_dims, a_buffer.shape, a_buffer.strides,
674690
total_elements, &a_needs_free);
@@ -827,7 +843,11 @@ static PyObject *multiply_scalar_array(PyObject *array_obj, PyObject *scalar_obj
827843

828844
nk_dtype_t dtype = resolve_nk_dtype_in_py_buffer(&a_buffer);
829845
if (out_dtype_obj) { dtype = py_object_to_nk_dtype(out_dtype_obj); }
830-
if (dtype == nk_dtype_unknown_k) goto cleanup;
846+
if (dtype == nk_dtype_unknown_k) {
847+
if (!PyErr_Occurred())
848+
PyErr_SetString(PyExc_TypeError, "unsupported buffer dtype for the requested elementwise operation");
849+
goto cleanup;
850+
}
831851

832852
nk_each_scale_punned_t scale_kernel = NULL;
833853
nk_capability_t capability = nk_cap_serial_k;
@@ -846,7 +866,11 @@ static PyObject *multiply_scalar_array(PyObject *array_obj, PyObject *scalar_obj
846866

847867
size_t const element_size = nk_dtype_bytes_per_value(dtype);
848868
size_t total_elements = 1;
849-
for (int dim = 0; dim < a_buffer.ndim; dim++) total_elements *= (size_t)a_buffer.shape[dim];
869+
for (int dim = 0; dim < a_buffer.ndim; dim++)
870+
if (!nk_size_mul_checked_(total_elements, (size_t)a_buffer.shape[dim], &total_elements)) {
871+
PyErr_SetString(PyExc_OverflowError, "tensor element count overflows size_t");
872+
goto cleanup;
873+
}
850874

851875
char *result_data = NULL;
852876
Py_ssize_t result_strides[NK_TENSOR_MAX_RANK];
@@ -946,7 +970,11 @@ static PyObject *multiply_array_array(PyObject *a_obj, PyObject *b_obj, PyObject
946970
}
947971

948972
if (out_dtype_obj) { dtype = py_object_to_nk_dtype(out_dtype_obj); }
949-
if (dtype == nk_dtype_unknown_k) goto cleanup;
973+
if (dtype == nk_dtype_unknown_k) {
974+
if (!PyErr_Occurred())
975+
PyErr_SetString(PyExc_TypeError, "unsupported buffer dtype for the requested elementwise operation");
976+
goto cleanup;
977+
}
950978

951979
nk_each_fma_punned_t fma_kernel = NULL;
952980
nk_capability_t capability = nk_cap_serial_k;
@@ -965,7 +993,11 @@ static PyObject *multiply_array_array(PyObject *a_obj, PyObject *b_obj, PyObject
965993

966994
int const num_dims = a_buffer.ndim;
967995
size_t total_elements = 1;
968-
for (int dim = 0; dim < num_dims; dim++) total_elements *= (size_t)a_buffer.shape[dim];
996+
for (int dim = 0; dim < num_dims; dim++)
997+
if (!nk_size_mul_checked_(total_elements, (size_t)a_buffer.shape[dim], &total_elements)) {
998+
PyErr_SetString(PyExc_OverflowError, "tensor element count overflows size_t");
999+
goto cleanup;
1000+
}
9691001

9701002
a_promoted = ensure_contiguous_buffer(a_buffer.buf, a_dtype, dtype, num_dims, a_buffer.shape, a_buffer.strides,
9711003
total_elements, &a_needs_free);

python/matrix.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -454,6 +454,7 @@ static PyObject *api_packed_common( //
454454
if (end_row < 0) end_row = (Py_ssize_t)height;
455455
if (start_row > (Py_ssize_t)height || end_row > (Py_ssize_t)height || start_row > end_row) {
456456
PyBuffer_Release(&a_buffer);
457+
if (owns_result) Py_DECREF(result);
457458
PyErr_Format(PyExc_ValueError, "Invalid row range [%zd, %zd) for matrix with %zu rows", start_row, end_row,
458459
(size_t)height);
459460
return NULL;
@@ -597,6 +598,7 @@ static PyObject *api_symmetric_common( //
597598
if (row_start > n_vectors || row_end > n_vectors || row_start > row_end) {
598599
PyErr_Format(PyExc_ValueError, "Invalid row range [%zu, %zu) for %zu vectors", (size_t)row_start,
599600
(size_t)row_end, (size_t)n_vectors);
601+
if (owns_result) Py_DECREF(result);
600602
goto cleanup;
601603
}
602604
nk_size_t row_count_val = (nk_size_t)(row_end - row_start);

0 commit comments

Comments
 (0)