xsuite
diff --git a/‎tests/test_capi.py‎
Lines changed: 150 additions & 66 deletions b/‎tests/test_capi.py‎
Lines changed: 150 additions & 66 deletions
diff --git a/‎tests/test_common.py‎
Lines changed: 11 additions & 9 deletions b/‎tests/test_common.py‎
Lines changed: 11 additions & 9 deletions
@@ -158,6 +158,70 @@ def test_array_dynamic_type_init_get_set(array_cls, example_shape):
             assert arr[ii].field1[idx_in_field] == 13 * vv
 
 
+@for_all_test_contexts
+@pytest.mark.parametrize(
+    "array_type",
+    [
+        xo.UInt64[3, 5, 7],
+        xo.UInt64[:, :, :],
+        xo.UInt64[:, 5, :],
+    ],
+)
+def test_array_get_shape(test_context, array_type):
+    source = """
+        #include "xobjects/headers/common.h"
+
+        GPUKERN void get_nd_and_shape(
+            ARRAY_TYPE arr,
+            GPUGLMEM int64_t* out_nd,
+            GPUGLMEM int64_t* out_shape
+        ) {
+            *out_nd = ARRAY_TYPE_nd(arr);
+            ARRAY_TYPE_shape(arr, out_shape);
+        }
+    """.replace(
+        "ARRAY_TYPE", array_type.__name__
+    )
+
+    kernels = {
+        "get_nd_and_shape": xo.Kernel(
+            c_name="get_nd_and_shape",
+            args=[
+                xo.Arg(array_type, name="arr"),
+                xo.Arg(xo.Int64, pointer=True, name="out_nd"),
+                xo.Arg(xo.Int64, pointer=True, name="out_shape"),
+            ],
+        ),
+    }
+
+    test_context.add_kernels(
+        sources=[source],
+        kernels=kernels,
+    )
+
+    instance = array_type(
+        np.array(range(3 * 5 * 7)).reshape((3, 5, 7)),
+        _context=test_context,
+    )
+
+    expected_nd = 3
+    result_nd = test_context.zeros((1,), dtype=np.int64)
+
+    expected_shape = [3, 5, 7]
+    result_shape = test_context.zeros((expected_nd,), dtype=np.int64)
+
+    test_context.kernels.get_nd_and_shape(
+        arr=instance,
+        out_nd=result_nd,
+        out_shape=result_shape,
+    )
+
+    assert result_nd[0] == expected_nd
+    assert result_shape[0] == expected_shape[0]
+    assert result_shape[1] == expected_shape[1]
+    assert result_shape[2] == expected_shape[2]
+
+
 def test_struct1():
     kernels = Struct1._gen_kernels()
     ctx = xo.ContextCpu()
@@ -539,47 +603,47 @@ def test_getp1_dyn_length_dyn_type_string_array():
         assert ord(ffi.cast("char *", s2)[8 + ii]) == ch
 
 
-def test_gpu_api():
-    for ctx in xo.context.get_test_contexts():
-        src_code = """
-        /*gpufun*/
-        void myfun(double x, double y,
-            double* z){
-            z[0] = x * y;
-            }
+@for_all_test_contexts
+def test_gpu_api(test_context):
+    src_code = """
+    /*gpufun*/
+    void myfun(double x, double y,
+        double* z){
+        z[0] = x * y;
+        }
 
-        /*gpukern*/
-        void my_mul(const int n,
-            /*gpuglmem*/ const double* x1,
-            /*gpuglmem*/ const double* x2,
-            /*gpuglmem*/       double* y) {
-            int tid = 0 //vectorize_over tid n
-            double z;
-            myfun(x1[tid], x2[tid], &z);
-            y[tid] = z;
-            //end_vectorize
-            }
-        """
-
-        kernel_descriptions = {
-            "my_mul": xo.Kernel(
-                args=[
-                    xo.Arg(xo.Int32, name="n"),
-                    xo.Arg(xo.Float64, pointer=True, const=True, name="x1"),
-                    xo.Arg(xo.Float64, pointer=True, const=True, name="x2"),
-                    xo.Arg(xo.Float64, pointer=True, const=False, name="y"),
-                ],
-                n_threads="n",
-            ),
+    /*gpukern*/
+    void my_mul(const int n,
+        /*gpuglmem*/ const double* x1,
+        /*gpuglmem*/ const double* x2,
+        /*gpuglmem*/       double* y) {
+        int tid = 0 //vectorize_over tid n
+        double z;
+        myfun(x1[tid], x2[tid], &z);
+        y[tid] = z;
+        //end_vectorize
         }
+    """
 
-        ctx.add_kernels(
-            sources=[src_code],
-            kernels=kernel_descriptions,
-            save_source_as=None,
-            compile=True,
-            extra_classes=[xo.String[:]],
-        )
+    kernel_descriptions = {
+        "my_mul": xo.Kernel(
+            args=[
+                xo.Arg(xo.Int32, name="n"),
+                xo.Arg(xo.Float64, pointer=True, const=True, name="x1"),
+                xo.Arg(xo.Float64, pointer=True, const=True, name="x2"),
+                xo.Arg(xo.Float64, pointer=True, const=False, name="y"),
+            ],
+            n_threads="n",
+        ),
+    }
+
+    test_context.add_kernels(
+        sources=[src_code],
+        kernels=kernel_descriptions,
+        save_source_as=None,
+        compile=True,
+        extra_classes=[xo.String[:]],
+    )
 
 
 @for_all_test_contexts
@@ -595,7 +659,9 @@ class Cells(xo.Struct):
         ids = xo.Int64[:]
         particles = xo.Int64[:][:]
 
-    cells = Cells(ids=cell_ids, particles=particle_per_cell)
+    cells = Cells(
+        ids=cell_ids, particles=particle_per_cell, _context=test_context
+    )
 
     # Data layout (displayed as uint64):
     #
@@ -627,23 +693,24 @@ class Cells(xo.Struct):
     src = r"""
     #include "xobjects/headers/common.h"
 
-    int MAX_PARTICLES = 4;
-    int MAX_CELLS = 3;
+    static const int MAX_PARTICLES = 4;
+    static const int MAX_CELLS = 3;
 
-    GPUKERN
-    uint8_t loop_over(Cells cells, uint64_t* out_counts, uint64_t* out_vals)
+    GPUKERN void loop_over(
+        Cells cells,
+        GPUGLMEM uint64_t* out_counts,
+        GPUGLMEM uint64_t* out_vals,
+        GPUGLMEM uint8_t* success
+    )
     {
-        uint8_t success = 1;
         int64_t num_cells = Cells_len_ids(cells);
 
         for (int64_t i = 0; i < num_cells; i++) {
             int64_t id = Cells_get_ids(cells, i);
             int64_t count = Cells_len1_particles(cells, i);
 
-            printf("Cell ID: %lld\n Particles (count %lld): ", id, count);
-
             if (i >= MAX_CELLS) {
-                success = 0;
+                *success = 0;
                 continue;
             }
 
@@ -654,19 +721,23 @@ class Cells(xo.Struct):
 
             VECTORIZE_OVER(j, num_particles);
                 int64_t val = ArrNInt64_get(particles, j);
-                printf("%lld ", val);
 
                 if (j >= MAX_PARTICLES) {
-                    success = 0;
-                    continue;
+                    *success = 0;
+                } else {
+                    out_vals[i * MAX_PARTICLES + j] = val;
                 }
-
-                out_vals[i * MAX_PARTICLES + j] = val;
             END_VECTORIZE;
-            printf("\n");
         }
-        fflush(stdout);
-        return success;
+    }
+
+    GPUKERN void kernel_Cells_get_particles(
+        Cells obj,
+        int64_t i0,
+        int64_t i1,
+        GPUGLMEM int64_t* out
+    ) {
+        *out = Cells_get_particles(obj, i0, i1);
     }
     """
 
@@ -676,33 +747,46 @@ class Cells(xo.Struct):
                 xo.Arg(Cells, name="cells"),
                 xo.Arg(xo.UInt64, pointer=True, name="out_counts"),
                 xo.Arg(xo.UInt64, pointer=True, name="out_vals"),
+                xo.Arg(xo.UInt8, pointer=True, name="success"),
             ],
-            n_threads="n",
-            ret=xo.Arg(xo.UInt8),
-        )
+            n_threads=4,
+        ),
+        "kernel_Cells_get_particles": xo.Kernel(
+            args=[
+                xo.Arg(Cells, name="obj"),
+                xo.Arg(xo.Int64, name="i0"),
+                xo.Arg(xo.Int64, name="i1"),
+                xo.Arg(xo.Int64, pointer=True, name="out"),
+            ],
+        ),
     }
-    kernels.update(Cells._gen_kernels())
 
     test_context.add_kernels(
         sources=[src],
         kernels=kernels,
     )
 
-    counts = np.zeros(len(cell_ids), dtype=np.uint64)
-    vals = np.zeros(12, dtype=np.uint64)
+    counts = test_context.zeros(len(cell_ids), dtype=np.uint64)
+    vals = test_context.zeros(12, dtype=np.uint64)
+    success = test_context.zeros((1,), dtype=np.uint8) + 1
 
     for i, _ in enumerate(particle_per_cell):
         for j, expected in enumerate(particle_per_cell[i]):
-            result = test_context.kernels.Cells_get_particles(
-                obj=cells, i0=i, i1=j
+            result = test_context.zeros(shape=(1,), dtype=np.int64)
+            test_context.kernels.kernel_Cells_get_particles(
+                obj=cells, i0=i, i1=j, out=result
             )
-            assert result == expected
+            assert result[0] == expected
 
-    ret = test_context.kernels.loop_over(
+    test_context.kernels.loop_over(
         cells=cells,
         out_counts=counts,
         out_vals=vals,
+        success=success,
     )
-    assert ret == 1
+    counts = test_context.nparray_from_context_array(counts)
+    vals = test_context.nparray_from_context_array(vals)
+
+    assert success[0] == 1
     assert np.all(counts == [2, 3, 4])
     assert np.all(vals == [1, 8, 0, 0, 9, 3, 2, 0, 4, 5, 6, 7])
@@ -16,19 +16,15 @@ def test_common_atomicadd(test_context):
     #include "xobjects/headers/common.h"
     #include "xobjects/headers/atomicadd.h"
 
-    GPUKERN
-    double test_atomic_add()
+    GPUKERN void test_atomic_add(GPUGLMEM double* out, int32_t iterations)
     {
-        int iterations = 1000;
-        double sum = 0;
         VECTORIZE_OVER(i, iterations);
             // If on CPU do some work to avoid the loop being optimized out
             #if defined(XO_CONTEXT_CPU_OPENMP)
                 usleep(10);
             #endif
-            atomicAdd(&sum, 1.0);
+            atomicAdd(out, 1.0);
         END_VECTORIZE;
-        return sum;
     }
     """
 
@@ -45,15 +41,21 @@ def test_common_atomicadd(test_context):
         sources=[src],
         kernels={
             "test_atomic_add": xo.Kernel(
-                args=[],
+                c_name="test_atomic_add",
+                args=[
+                    xo.Arg(xo.Float64, pointer=True, name="out"),
+                    xo.Arg(xo.Int32, name="iterations"),
+                ],
                 n_threads=n_threads,
-                ret=xo.Arg(xo.Float64),
             )
         },
     )
 
     expected = 1000
-    result = test_context.kernels.test_atomic_add()
+    result = np.array([0], dtype=np.float64)
+    result_ctx = test_context.nparray_to_context_array(result)
+    test_context.kernels.test_atomic_add(out=result_ctx, iterations=expected)
+    result = test_context.nparray_from_context_array(result_ctx)
 
     assert result == expected