Skip to content

Commit 02d0e90

Browse files
Add support for indexed arrays in structs and related kernels
Signed-off-by: Fabien Péan <pean@virtonomy.io>
1 parent 32d214d commit 02d0e90

File tree

5 files changed

+233
-0
lines changed

5 files changed

+233
-0
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
factorization and solve, improving memory access patterns and eliminating shared memory
2121
bank conflicts at power-of-2 tile sizes
2222
([GH-1318](https://github.com/NVIDIA/warp/issues/1318)).
23+
- Add support for `wp.indexedarray` fields in `@wp.struct` (assignment, device transfer, and NumPy structured values)
24+
([GH-1327](https://github.com/NVIDIA/warp/issues/1327)).
2325

2426
### Removed
2527

warp/_src/codegen.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,11 @@ def to(self, device):
317317
if matches_array_class(var.type, array):
318318
# array_t
319319
setattr(dst, name, value.to(device))
320+
elif matches_array_class(var.type, indexedarray):
321+
# indexedarray_t
322+
# `.to` returns an array if on different device, force to identity indexedarray
323+
cloned = value.to(device)
324+
setattr(dst, name, cloned if isinstance(cloned, indexedarray) else indexedarray(cloned))
320325
elif isinstance(var.type, Struct):
321326
# nested struct
322327
new_struct = var.type()
@@ -344,6 +349,9 @@ def numpy_value(self):
344349
if matches_array_class(var.type, array):
345350
# array_t
346351
npvalue.append(value.numpy_value())
352+
elif matches_array_class(var.type, indexedarray):
353+
# indexedarray_t
354+
npvalue.append(value.numpy_value())
347355
elif isinstance(var.type, Struct):
348356
# nested struct
349357
npvalue.append(value.numpy_value())
@@ -379,6 +387,8 @@ def _make_struct_field_constructor(field: str, var_type: type):
379387
return lambda ctype: var_type.instance_type(ctype=getattr(ctype, field))
380388
elif matches_array_class(var_type, warp._src.types.array):
381389
return lambda ctype: None
390+
elif matches_array_class(var_type, warp._src.types.indexedarray):
391+
return lambda ctype: None
382392
elif _is_texture_type(var_type):
383393
return lambda ctype: None
384394
elif issubclass(var_type, ctypes.Array):
@@ -409,6 +419,19 @@ def set_array_value(inst, value):
409419

410420
cls.__setattr__(inst, field, value)
411421

422+
def set_indexedarray_value(inst, value):
423+
if value is None:
424+
# create indexedarray with null pointers
425+
setattr(inst._ctype, field, var_type.__ctype__())
426+
else:
427+
assert isinstance(value, indexedarray)
428+
assert types_equal(value.dtype, var_type.dtype), (
429+
f"assign to struct member variable {field} failed, expected type {type_repr(var_type.dtype)}, got type {type_repr(value.dtype)}"
430+
)
431+
setattr(inst._ctype, field, value.__ctype__())
432+
433+
cls.__setattr__(inst, field, value)
434+
412435
def set_struct_value(inst, value):
413436
getattr(inst, field).assign(value)
414437

@@ -468,6 +491,8 @@ def set_texture_value(inst, value):
468491

469492
if matches_array_class(var_type, array):
470493
return set_array_value
494+
elif matches_array_class(var_type, indexedarray):
495+
return set_indexedarray_value
471496
elif isinstance(var_type, Struct):
472497
return set_struct_value
473498
elif _is_texture_type(var_type):
@@ -498,6 +523,8 @@ def __init__(self, key: str, cls: type, module: warp._src.context.Module):
498523
for label, var in self.vars.items():
499524
if matches_array_class(var.type, array):
500525
fields.append((label, array_t))
526+
elif matches_array_class(var.type, indexedarray):
527+
fields.append((label, indexedarray_t))
501528
elif isinstance(var.type, Struct):
502529
fields.append((label, var.type.ctype))
503530
elif issubclass(var.type, ctypes.Array):
@@ -613,6 +640,9 @@ def numpy_dtype(self):
613640
if matches_array_class(var.type, array):
614641
# array_t
615642
formats.append(array_t.numpy_dtype())
643+
elif matches_array_class(var.type, indexedarray):
644+
# indexedarray_t
645+
formats.append(indexedarray_t.numpy_dtype())
616646
elif isinstance(var.type, Struct):
617647
# nested struct
618648
formats.append(var.type.numpy_dtype())
@@ -646,6 +676,9 @@ def from_ptr(self, ptr):
646676
# no easy way to make a backref.
647677
# Instead, we just create a stub annotation, which is not a fully usable array object.
648678
setattr(instance, name, array(dtype=var.type.dtype, ndim=var.type.ndim))
679+
elif matches_array_class(var.type, indexedarray):
680+
# Same as regular arrays: return an annotation stub only.
681+
setattr(instance, name, indexedarray(dtype=var.type.dtype, ndim=var.type.ndim))
649682
elif isinstance(var.type, Struct):
650683
# nested struct
651684
value = var.type.from_ptr(ptr + offset)

warp/_src/types.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2198,6 +2198,38 @@ def __init__(self, data, indices, shape):
21982198
self.indices[i] = ctypes.c_void_p(None)
21992199
self.shape[i] = shape[i]
22002200

2201+
# structured type description used when indexedarray_t is packed in a struct and shared via numpy structured array.
2202+
@classmethod
2203+
def numpy_dtype(cls):
2204+
return cls._numpy_dtype_
2205+
2206+
# structured value used when indexedarray_t is packed in a struct and shared via a numpy structured array
2207+
def numpy_value(self):
2208+
# pointers are represented as unsigned 64-bit integers
2209+
indices = []
2210+
for i in range(ARRAY_MAX_DIMS):
2211+
v = self.indices[i]
2212+
# v may be a ctypes.c_void_p instance
2213+
if isinstance(v, ctypes.c_void_p):
2214+
indices.append(0 if v.value is None else int(v.value))
2215+
else:
2216+
indices.append(0 if v is None else int(v))
2217+
2218+
return (self.data.numpy_value(), indices, list(self.shape))
2219+
2220+
2221+
# NOTE: must match indexedarray_t._fields_
2222+
indexedarray_t._numpy_dtype_ = {
2223+
"names": ["data", "indices", "shape"],
2224+
"formats": [array_t.numpy_dtype(), f"{ARRAY_MAX_DIMS}u8", f"{ARRAY_MAX_DIMS}i4"],
2225+
"offsets": [
2226+
indexedarray_t.data.offset,
2227+
indexedarray_t.indices.offset,
2228+
indexedarray_t.shape.offset,
2229+
],
2230+
"itemsize": ctypes.sizeof(indexedarray_t),
2231+
}
2232+
22012233

22022234
class tuple_t:
22032235
"""Used during codegen to store multiple values into a single variable."""

warp/native/array.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1275,6 +1275,10 @@ CUDA_CALLABLE inline void adj_where(
12751275
// atomic add the whole struct onto an array (e.g.: during backwards pass)
12761276
template <typename T> CUDA_CALLABLE inline void atomic_add(array_t<T>*, array_t<T>) { }
12771277

1278+
// stub for the case where we have an indexed array inside a struct and
1279+
// atomic add the whole struct onto an array (e.g.: during backwards pass)
1280+
template <typename T> CUDA_CALLABLE inline void atomic_add(indexedarray_t<T>*, indexedarray_t<T>) { }
1281+
12781282
// for float and vector types this is just an alias for an atomic add
12791283
template <typename T> CUDA_CALLABLE inline void adj_atomic_add(T* buf, T value) { atomic_add(buf, value); }
12801284

warp/tests/test_indexedarray.py

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,152 @@ def test_indexedarray_1d(test, device):
4242
wp.launch(kernel_1d, dim=iarr.size, inputs=[iarr, expected_arr], device=device)
4343

4444

45+
@wp.struct
46+
class IndexedArrayStruct:
47+
iarr: wp.indexedarray(dtype=float)
48+
49+
50+
@wp.struct
51+
class NestedIndexedArrayStruct:
52+
inner: IndexedArrayStruct
53+
54+
55+
@wp.kernel
56+
def kernel_indexedarray_in_struct(arg: IndexedArrayStruct, expected: wp.array(dtype=float)):
57+
i = wp.tid()
58+
59+
wp.expect_eq(arg.iarr[i], expected[i])
60+
61+
arg.iarr[i] = 2.0 * arg.iarr[i]
62+
wp.atomic_add(arg.iarr, i, 1.0)
63+
64+
wp.expect_eq(arg.iarr[i], 2.0 * expected[i] + 1.0)
65+
66+
67+
@wp.kernel
68+
def kernel_indexedarray_in_nested_struct(arg: NestedIndexedArrayStruct, expected: wp.array(dtype=float)):
69+
i = wp.tid()
70+
71+
wp.expect_eq(arg.inner.iarr[i], expected[i])
72+
73+
arg.inner.iarr[i] = 2.0 * arg.inner.iarr[i]
74+
wp.atomic_add(arg.inner.iarr, i, 1.0)
75+
76+
wp.expect_eq(arg.inner.iarr[i], 2.0 * expected[i] + 1.0)
77+
78+
79+
@wp.kernel
80+
def kernel_indexedarray_in_struct_array(args: wp.array(dtype=IndexedArrayStruct), expected: wp.array(dtype=float)):
81+
i = wp.tid()
82+
83+
s = args[0]
84+
wp.expect_eq(s.iarr[i], expected[i])
85+
86+
s.iarr[i] = 2.0 * s.iarr[i]
87+
wp.atomic_add(s.iarr, i, 1.0)
88+
89+
wp.expect_eq(s.iarr[i], 2.0 * expected[i] + 1.0)
90+
91+
92+
def test_indexedarray_in_struct(test, device):
93+
values = np.arange(10, dtype=np.float32)
94+
arr = wp.array(data=values, device=device)
95+
96+
indices = wp.array([1, 3, 5, 7, 9], dtype=int, device=device)
97+
iarr = wp.indexedarray1d(arr, [indices])
98+
99+
expected_arr = wp.array(data=[1, 3, 5, 7, 9], dtype=float, device=device)
100+
101+
s = IndexedArrayStruct()
102+
s.iarr = iarr
103+
104+
wp.launch(kernel_indexedarray_in_struct, dim=iarr.size, inputs=[s, expected_arr], device=device)
105+
wp.synchronize_device(device)
106+
107+
108+
def test_indexedarray_in_nested_struct(test, device):
109+
values = np.arange(10, dtype=np.float32)
110+
arr = wp.array(data=values, device=device)
111+
112+
indices = wp.array([1, 3, 5, 7, 9], dtype=int, device=device)
113+
iarr = wp.indexedarray1d(arr, [indices])
114+
115+
expected_arr = wp.array(data=[1, 3, 5, 7, 9], dtype=float, device=device)
116+
117+
inner = IndexedArrayStruct()
118+
inner.iarr = iarr
119+
120+
outer = NestedIndexedArrayStruct()
121+
outer.inner = inner
122+
123+
wp.launch(kernel_indexedarray_in_nested_struct, dim=iarr.size, inputs=[outer, expected_arr], device=device)
124+
wp.synchronize_device(device)
125+
126+
127+
def test_indexedarray_in_struct_array(test, device):
128+
values = np.arange(10, dtype=np.float32)
129+
arr = wp.array(data=values, device=device)
130+
131+
indices = wp.array([1, 3, 5, 7, 9], dtype=int, device=device)
132+
iarr = wp.indexedarray1d(arr, [indices])
133+
134+
expected_arr = wp.array(data=[1, 3, 5, 7, 9], dtype=float, device=device)
135+
136+
s = IndexedArrayStruct()
137+
s.iarr = iarr
138+
struct_arr = wp.array([s], dtype=IndexedArrayStruct, device=device)
139+
140+
wp.launch(kernel_indexedarray_in_struct_array, dim=iarr.size, inputs=[struct_arr, expected_arr], device=device)
141+
wp.synchronize_device(device)
142+
143+
144+
def test_indexedarray_in_struct_numpy(test, device):
145+
values = np.arange(4, dtype=np.float32)
146+
arr = wp.array(data=values, device=device)
147+
148+
indices = wp.array([0, 2], dtype=int, device=device)
149+
iarr = wp.indexedarray1d(arr, [indices])
150+
151+
s = IndexedArrayStruct()
152+
s.iarr = iarr
153+
154+
# Just ensure these are functional for structs embedding indexedarray_t
155+
dtype = IndexedArrayStruct.numpy_dtype()
156+
value = s.numpy_value()
157+
158+
test.assertIsInstance(dtype, dict)
159+
test.assertEqual(dtype["names"], ["iarr"])
160+
test.assertEqual(len(value), 1)
161+
162+
163+
def test_indexedarray_in_struct_to_device_transfer(test, device):
164+
# This test only applies to CUDA target devices.
165+
if not wp.is_cuda_available() or not wp.get_device(device).is_cuda:
166+
test.skipTest("Requires CUDA")
167+
168+
# Create the indexedarray on CPU, then move the struct to CUDA.
169+
values = np.arange(10, dtype=np.float32)
170+
arr_cpu = wp.array(data=values, device="cpu")
171+
indices_cpu = wp.array([1, 3, 5, 7, 9], dtype=int, device="cpu")
172+
iarr_cpu = wp.indexedarray1d(arr_cpu, [indices_cpu])
173+
174+
s = IndexedArrayStruct()
175+
s.iarr = iarr_cpu
176+
177+
s_cuda = s.to(device)
178+
test.assertIsInstance(s_cuda.iarr, wp.indexedarray)
179+
test.assertTrue(all(x is None for x in s_cuda.iarr.indices))
180+
test.assertEqual(s_cuda.iarr.shape, iarr_cpu.shape)
181+
182+
expected_values = np.array([1, 3, 5, 7, 9], dtype=np.float32)
183+
expected_arr = wp.array(data=expected_values, dtype=float, device=device)
184+
185+
wp.launch(kernel_indexedarray_in_struct, dim=s_cuda.iarr.size, inputs=[s_cuda, expected_arr], device=device)
186+
# After the kernel: a[i] = 2*a[i] then atomic_add(a, i, 1) => 2*expected + 1
187+
result = s_cuda.iarr.numpy()
188+
assert_np_equal(result, 2.0 * expected_values + 1.0)
189+
190+
45191
@wp.kernel
46192
def kernel_2d(a: wp.indexedarray2d(dtype=float), expected: wp.array2d(dtype=float)):
47193
i, j = wp.tid()
@@ -1121,6 +1267,22 @@ class TestIndexedArray(unittest.TestCase):
11211267
add_function_test(TestIndexedArray, "test_indexedarray_fill_vector", test_indexedarray_fill_vector, devices=devices)
11221268
add_function_test(TestIndexedArray, "test_indexedarray_fill_matrix", test_indexedarray_fill_matrix, devices=devices)
11231269
add_function_test(TestIndexedArray, "test_indexedarray_fill_struct", test_indexedarray_fill_struct, devices=devices)
1270+
add_function_test(TestIndexedArray, "test_indexedarray_in_struct", test_indexedarray_in_struct, devices=devices)
1271+
add_function_test(
1272+
TestIndexedArray, "test_indexedarray_in_nested_struct", test_indexedarray_in_nested_struct, devices=devices
1273+
)
1274+
add_function_test(
1275+
TestIndexedArray, "test_indexedarray_in_struct_array", test_indexedarray_in_struct_array, devices=devices
1276+
)
1277+
add_function_test(
1278+
TestIndexedArray, "test_indexedarray_in_struct_numpy", test_indexedarray_in_struct_numpy, devices=devices
1279+
)
1280+
add_function_test(
1281+
TestIndexedArray,
1282+
"test_indexedarray_in_struct_to_device_transfer",
1283+
test_indexedarray_in_struct_to_device_transfer,
1284+
devices=devices,
1285+
)
11241286

11251287

11261288
if __name__ == "__main__":

0 commit comments

Comments
 (0)