|
11 | 11 | import cffi |
12 | 12 |
|
13 | 13 | import xobjects as xo |
14 | | - |
| 14 | +from xobjects.test_helpers import for_all_test_contexts |
15 | 15 |
|
16 | 16 | ffi = cffi.FFI() |
17 | 17 |
|
@@ -158,6 +158,70 @@ def test_array_dynamic_type_init_get_set(array_cls, example_shape): |
158 | 158 | assert arr[ii].field1[idx_in_field] == 13 * vv |
159 | 159 |
|
160 | 160 |
|
| 161 | +@for_all_test_contexts |
| 162 | +@pytest.mark.parametrize( |
| 163 | + "array_type", |
| 164 | + [ |
| 165 | + xo.UInt64[3, 5, 7], |
| 166 | + xo.UInt64[:, :, :], |
| 167 | + xo.UInt64[:, 5, :], |
| 168 | + ], |
| 169 | +) |
| 170 | +def test_array_get_shape(test_context, array_type): |
| 171 | + source = """ |
| 172 | + #include "xobjects/headers/common.h" |
| 173 | +
|
| 174 | + GPUKERN void get_nd_and_shape( |
| 175 | + ARRAY_TYPE arr, |
| 176 | + GPUGLMEM int64_t* out_nd, |
| 177 | + GPUGLMEM int64_t* out_shape |
| 178 | + ) { |
| 179 | + *out_nd = ARRAY_TYPE_nd(arr); |
| 180 | + ARRAY_TYPE_shape(arr, out_shape); |
| 181 | + } |
| 182 | + """.replace( |
| 183 | + "ARRAY_TYPE", array_type.__name__ |
| 184 | + ) |
| 185 | + |
| 186 | + kernels = { |
| 187 | + "get_nd_and_shape": xo.Kernel( |
| 188 | + c_name="get_nd_and_shape", |
| 189 | + args=[ |
| 190 | + xo.Arg(array_type, name="arr"), |
| 191 | + xo.Arg(xo.Int64, pointer=True, name="out_nd"), |
| 192 | + xo.Arg(xo.Int64, pointer=True, name="out_shape"), |
| 193 | + ], |
| 194 | + ), |
| 195 | + } |
| 196 | + |
| 197 | + test_context.add_kernels( |
| 198 | + sources=[source], |
| 199 | + kernels=kernels, |
| 200 | + ) |
| 201 | + |
| 202 | + instance = array_type( |
| 203 | + np.array(range(3 * 5 * 7)).reshape((3, 5, 7)), |
| 204 | + _context=test_context, |
| 205 | + ) |
| 206 | + |
| 207 | + expected_nd = 3 |
| 208 | + result_nd = test_context.zeros((1,), dtype=np.int64) |
| 209 | + |
| 210 | + expected_shape = [3, 5, 7] |
| 211 | + result_shape = test_context.zeros((expected_nd,), dtype=np.int64) |
| 212 | + |
| 213 | + test_context.kernels.get_nd_and_shape( |
| 214 | + arr=instance, |
| 215 | + out_nd=result_nd, |
| 216 | + out_shape=result_shape, |
| 217 | + ) |
| 218 | + |
| 219 | + assert result_nd[0] == expected_nd |
| 220 | + assert result_shape[0] == expected_shape[0] |
| 221 | + assert result_shape[1] == expected_shape[1] |
| 222 | + assert result_shape[2] == expected_shape[2] |
| 223 | + |
| 224 | + |
161 | 225 | def test_struct1(): |
162 | 226 | kernels = Struct1._gen_kernels() |
163 | 227 | ctx = xo.ContextCpu() |
@@ -539,45 +603,190 @@ def test_getp1_dyn_length_dyn_type_string_array(): |
539 | 603 | assert ord(ffi.cast("char *", s2)[8 + ii]) == ch |
540 | 604 |
|
541 | 605 |
|
542 | | -def test_gpu_api(): |
543 | | - for ctx in xo.context.get_test_contexts(): |
544 | | - src_code = """ |
545 | | - /*gpufun*/ |
546 | | - void myfun(double x, double y, |
547 | | - double* z){ |
548 | | - z[0] = x * y; |
549 | | - } |
| 606 | +@for_all_test_contexts |
| 607 | +def test_gpu_api(test_context): |
| 608 | + src_code = """ |
| 609 | + /*gpufun*/ |
| 610 | + void myfun(double x, double y, |
| 611 | + double* z){ |
| 612 | + z[0] = x * y; |
| 613 | + } |
550 | 614 |
|
551 | | - /*gpukern*/ |
552 | | - void my_mul(const int n, |
553 | | - /*gpuglmem*/ const double* x1, |
554 | | - /*gpuglmem*/ const double* x2, |
555 | | - /*gpuglmem*/ double* y) { |
556 | | - int tid = 0 //vectorize_over tid n |
557 | | - double z; |
558 | | - myfun(x1[tid], x2[tid], &z); |
559 | | - y[tid] = z; |
560 | | - //end_vectorize |
| 615 | + /*gpukern*/ |
| 616 | + void my_mul(const int n, |
| 617 | + /*gpuglmem*/ const double* x1, |
| 618 | + /*gpuglmem*/ const double* x2, |
| 619 | + /*gpuglmem*/ double* y) { |
| 620 | + int tid = 0 //vectorize_over tid n |
| 621 | + double z; |
| 622 | + myfun(x1[tid], x2[tid], &z); |
| 623 | + y[tid] = z; |
| 624 | + //end_vectorize |
| 625 | + } |
| 626 | + """ |
| 627 | + |
| 628 | + kernel_descriptions = { |
| 629 | + "my_mul": xo.Kernel( |
| 630 | + args=[ |
| 631 | + xo.Arg(xo.Int32, name="n"), |
| 632 | + xo.Arg(xo.Float64, pointer=True, const=True, name="x1"), |
| 633 | + xo.Arg(xo.Float64, pointer=True, const=True, name="x2"), |
| 634 | + xo.Arg(xo.Float64, pointer=True, const=False, name="y"), |
| 635 | + ], |
| 636 | + n_threads="n", |
| 637 | + ), |
| 638 | + } |
| 639 | + |
| 640 | + test_context.add_kernels( |
| 641 | + sources=[src_code], |
| 642 | + kernels=kernel_descriptions, |
| 643 | + save_source_as=None, |
| 644 | + compile=True, |
| 645 | + extra_classes=[xo.String[:]], |
| 646 | + ) |
| 647 | + |
| 648 | + |
| 649 | +@for_all_test_contexts |
| 650 | +def test_array_of_arrays(test_context): |
| 651 | + cell_ids = [3, 5, 7] |
| 652 | + particle_per_cell = [ |
| 653 | + [1, 8], |
| 654 | + [9, 3, 2], |
| 655 | + [4, 5, 6, 7], |
| 656 | + ] |
| 657 | + |
| 658 | + class Cells(xo.Struct): |
| 659 | + ids = xo.Int64[:] |
| 660 | + particles = xo.Int64[:][:] |
| 661 | + |
| 662 | + cells = Cells( |
| 663 | + ids=cell_ids, particles=particle_per_cell, _context=test_context |
| 664 | + ) |
| 665 | + |
| 666 | + # Data layout (displayed as uint64): |
| 667 | + # |
| 668 | + # [0] 216 (cells size) |
| 669 | + # [8] 56 (offset field 2 -- particles field) |
| 670 | + # [16] cell_ids data: |
| 671 | + # [0] 40 (cell_ids size) |
| 672 | + # [8] 3 (cell_ids length) |
| 673 | + # [16] {3, 5, 7} (cell_ids elements) |
| 674 | + # [56] particles data: |
| 675 | + # [0] 160 (particles size) |
| 676 | + # [8] 3 (particles length) |
| 677 | + # [16] 40 (offset particles[0]) |
| 678 | + # [24] 72 (offset particles[1]) |
| 679 | + # [32] 112 (offset particles[2]) |
| 680 | + # [40] particles[0] data: |
| 681 | + # [0] 32 (particles[0] size) |
| 682 | + # [8] 2 (particles[0] length) |
| 683 | + # [16] {1, 8} (particles[0] elements) |
| 684 | + # [72] particles[1] data: |
| 685 | + # [0] 40 (particles[1] size) |
| 686 | + # [8] 3 (particles[1] length) |
| 687 | + # [16] {9, 3, 2} (particles[1 |
| 688 | + # [112] particles[2] data: |
| 689 | + # [0] 48 (particles[2] size) |
| 690 | + # [8] 4 (particles[2] length) |
| 691 | + # [16] {4, 5, 6, 7} (particles[2] elements) |
| 692 | + |
| 693 | + src = r""" |
| 694 | + #include "xobjects/headers/common.h" |
| 695 | +
|
| 696 | + static const int MAX_PARTICLES = 4; |
| 697 | + static const int MAX_CELLS = 3; |
| 698 | +
|
| 699 | + GPUKERN void loop_over( |
| 700 | + Cells cells, |
| 701 | + GPUGLMEM uint64_t* out_counts, |
| 702 | + GPUGLMEM uint64_t* out_vals, |
| 703 | + GPUGLMEM uint8_t* success |
| 704 | + ) |
| 705 | + { |
| 706 | + int64_t num_cells = Cells_len_ids(cells); |
| 707 | +
|
| 708 | + for (int64_t i = 0; i < num_cells; i++) { |
| 709 | + int64_t id = Cells_get_ids(cells, i); |
| 710 | + int64_t count = Cells_len1_particles(cells, i); |
| 711 | +
|
| 712 | + if (i >= MAX_CELLS) { |
| 713 | + *success = 0; |
| 714 | + continue; |
561 | 715 | } |
562 | | - """ |
563 | | - |
564 | | - kernel_descriptions = { |
565 | | - "my_mul": xo.Kernel( |
566 | | - args=[ |
567 | | - xo.Arg(xo.Int32, name="n"), |
568 | | - xo.Arg(xo.Float64, pointer=True, const=True, name="x1"), |
569 | | - xo.Arg(xo.Float64, pointer=True, const=True, name="x2"), |
570 | | - xo.Arg(xo.Float64, pointer=True, const=False, name="y"), |
571 | | - ], |
572 | | - n_threads="n", |
573 | | - ), |
| 716 | +
|
| 717 | + out_counts[i] = count; |
| 718 | +
|
| 719 | + ArrNInt64 particles = Cells_getp1_particles(cells, i); |
| 720 | + uint32_t num_particles = ArrNInt64_len(particles); |
| 721 | +
|
| 722 | + VECTORIZE_OVER(j, num_particles); |
| 723 | + int64_t val = ArrNInt64_get(particles, j); |
| 724 | +
|
| 725 | + if (j >= MAX_PARTICLES) { |
| 726 | + *success = 0; |
| 727 | + } else { |
| 728 | + out_vals[i * MAX_PARTICLES + j] = val; |
| 729 | + } |
| 730 | + END_VECTORIZE; |
574 | 731 | } |
| 732 | + } |
| 733 | +
|
| 734 | + GPUKERN void kernel_Cells_get_particles( |
| 735 | + Cells obj, |
| 736 | + int64_t i0, |
| 737 | + int64_t i1, |
| 738 | + GPUGLMEM int64_t* out |
| 739 | + ) { |
| 740 | + *out = Cells_get_particles(obj, i0, i1); |
| 741 | + } |
| 742 | + """ |
| 743 | + |
| 744 | + kernels = { |
| 745 | + "loop_over": xo.Kernel( |
| 746 | + args=[ |
| 747 | + xo.Arg(Cells, name="cells"), |
| 748 | + xo.Arg(xo.UInt64, pointer=True, name="out_counts"), |
| 749 | + xo.Arg(xo.UInt64, pointer=True, name="out_vals"), |
| 750 | + xo.Arg(xo.UInt8, pointer=True, name="success"), |
| 751 | + ], |
| 752 | + n_threads=4, |
| 753 | + ), |
| 754 | + "kernel_Cells_get_particles": xo.Kernel( |
| 755 | + args=[ |
| 756 | + xo.Arg(Cells, name="obj"), |
| 757 | + xo.Arg(xo.Int64, name="i0"), |
| 758 | + xo.Arg(xo.Int64, name="i1"), |
| 759 | + xo.Arg(xo.Int64, pointer=True, name="out"), |
| 760 | + ], |
| 761 | + ), |
| 762 | + } |
| 763 | + |
| 764 | + test_context.add_kernels( |
| 765 | + sources=[src], |
| 766 | + kernels=kernels, |
| 767 | + ) |
575 | 768 |
|
576 | | - ctx.add_kernels( |
577 | | - sources=[src_code], |
578 | | - kernels=kernel_descriptions, |
579 | | - # save_src_as=f'_test_{name}.c') |
580 | | - save_source_as=None, |
581 | | - compile=True, |
582 | | - extra_classes=[xo.String[:]], |
583 | | - ) |
| 769 | + counts = test_context.zeros(len(cell_ids), dtype=np.uint64) |
| 770 | + vals = test_context.zeros(12, dtype=np.uint64) |
| 771 | + success = test_context.zeros((1,), dtype=np.uint8) + 1 |
| 772 | + |
| 773 | + for i, _ in enumerate(particle_per_cell): |
| 774 | + for j, expected in enumerate(particle_per_cell[i]): |
| 775 | + result = test_context.zeros(shape=(1,), dtype=np.int64) |
| 776 | + test_context.kernels.kernel_Cells_get_particles( |
| 777 | + obj=cells, i0=i, i1=j, out=result |
| 778 | + ) |
| 779 | + assert result[0] == expected |
| 780 | + |
| 781 | + test_context.kernels.loop_over( |
| 782 | + cells=cells, |
| 783 | + out_counts=counts, |
| 784 | + out_vals=vals, |
| 785 | + success=success, |
| 786 | + ) |
| 787 | + counts = test_context.nparray_from_context_array(counts) |
| 788 | + vals = test_context.nparray_from_context_array(vals) |
| 789 | + |
| 790 | + assert success[0] == 1 |
| 791 | + assert np.all(counts == [2, 3, 4]) |
| 792 | + assert np.all(vals == [1, 8, 0, 0, 9, 3, 2, 0, 4, 5, 6, 7]) |
0 commit comments