diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 88ee173fafcf3..85923c6b5a8d6 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -210,6 +210,7 @@ jobs: -DTI_WITH_VULKAN:BOOL=ON -DTI_WITH_BACKTRACE:BOOL=ON -DTI_BUILD_TESTS:BOOL=ON + -DTI_BUILD_EXAMPLES:BOOL=ON TI_WANTED_ARCHS: 'cpu,cuda,vulkan,gles' TI_DEVICE_MEMORY_GB: '1' TI_RUN_RELEASE_TESTS: '1' @@ -286,6 +287,7 @@ jobs: -DTI_WITH_VULKAN:BOOL=OFF -DTI_WITH_OPENGL:BOOL=OFF -DTI_BUILD_TESTS:BOOL=ON + -DTI_BUILD_EXAMPLES:BOOL=ON -DTI_WITH_AMDGPU:BOOL=ON steps: diff --git a/CMakeLists.txt b/CMakeLists.txt index 086e831fcb95e..8b9c4d1fd01fe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -139,9 +139,9 @@ endif() option(TI_BUILD_EXAMPLES "Build the CPP examples" ON) option(TI_BUILD_RHI_EXAMPLES "Build the Unified Device API examples" OFF) -if(NOT TI_WITH_LLVM OR NOT TI_WITH_METAL) - set(TI_BUILD_EXAMPLES OFF) -endif() +# if(NOT TI_WITH_LLVM OR NOT TI_WITH_METAL) +# set(TI_BUILD_EXAMPLES OFF) +# endif() message("C++ Flags: ${CMAKE_CXX_FLAGS}") message("Build type: ${CMAKE_BUILD_TYPE}") diff --git a/cpp_examples/autograd.cpp b/cpp_examples/autograd.cpp index 21cc2a2a64d87..d13c41f136be7 100644 --- a/cpp_examples/autograd.cpp +++ b/cpp_examples/autograd.cpp @@ -157,28 +157,30 @@ int main() { auto _ = builder.get_loop_guard(loop); auto *i = builder.get_loop_index(loop); + auto dt = TypeFactory::get_instance().get_ndarray_struct_type( + get_data_type(), 1); auto *ext_a = builder.create_external_ptr( - builder.create_arg_load({0}, PrimitiveType::f32, true, 0), {i}); + builder.create_arg_load({0}, dt, true, 0, false), {i}); auto *a_grad_i = builder.create_global_load( builder.create_global_ptr(a->get_adjoint(), {i})); builder.create_global_store(ext_a, a_grad_i); auto *ext_b = builder.create_external_ptr( - builder.create_arg_load({1}, PrimitiveType::f32, true, 0), {i}); + builder.create_arg_load({1}, dt, true, 0, false), {i}); auto *b_grad_i = builder.create_global_load( builder.create_global_ptr(b->get_adjoint(), {i})); builder.create_global_store(ext_b, b_grad_i); auto *ext_c = builder.create_external_ptr( - builder.create_arg_load({2}, PrimitiveType::f32, true, 0), {i}); + builder.create_arg_load({2}, dt, true, 0, false), {i}); auto *c_i = builder.create_global_load(builder.create_global_ptr(c, {i})); builder.create_global_store(ext_c, c_i); } kernel_ext = std::make_unique(program, builder.extract_ir(), "ext"); - kernel_ext->insert_arr_param(get_data_type(), /*total_dim=*/1, {n}); - kernel_ext->insert_arr_param(get_data_type(), /*total_dim=*/1, {n}); - kernel_ext->insert_arr_param(get_data_type(), /*total_dim=*/1, {n}); + kernel_ext->insert_ndarray_param(get_data_type(), /*total_dim=*/1); + kernel_ext->insert_ndarray_param(get_data_type(), /*total_dim=*/1); + kernel_ext->insert_ndarray_param(get_data_type(), /*total_dim=*/1); kernel_ext->finalize_params(); } diff --git a/cpp_examples/run_snode.cpp b/cpp_examples/run_snode.cpp index 4b618a7489773..556f2eec8e0dc 100644 --- a/cpp_examples/run_snode.cpp +++ b/cpp_examples/run_snode.cpp @@ -15,7 +15,7 @@ int main() { @ti.kernel def init(): for index in range(n): - place[index] = index + place[index] = index * 2 + 1 @ti.kernel def ret() -> ti.i32: @@ -40,6 +40,10 @@ int main() { using namespace taichi; using namespace lang; auto program = Program(host_arch()); + // program.get_program_impl()->config->opt_level = 0; + // program.get_program_impl()->config->external_optimization_level = 0; + // program.get_program_impl()->config->advanced_optimization = false; + // program.get_program_impl()->config->print_ir = true; const auto &config = program.compile_config(); /*CompileConfig config_print_ir; config_print_ir.print_ir = true; @@ -47,9 +51,9 @@ int main() { int n = 10; program.materialize_runtime(); - auto *root = new SNode(0, SNodeType::root); - auto *pointer = &root->pointer(Axis(0), n); - auto *place = &pointer->insert_children(SNodeType::place); + auto root = new SNode(0, SNodeType::root); + auto pointer = &root->pointer(Axis(0), n); + auto place = &pointer->insert_children(SNodeType::place); place->dt = PrimitiveType::i32; program.add_snode_tree(std::unique_ptr(root), /*compile_only=*/false); @@ -60,17 +64,21 @@ int main() { @ti.kernel def init(): for index in range(n): - place[index] = index + place[index] = index * 2 + 1 */ IRBuilder builder; - auto *zero = builder.get_int32(0); - auto *n_stmt = builder.get_int32(n); - auto *loop = builder.create_range_for(zero, n_stmt, 0, 4); + auto zero = builder.get_int32(0); + auto n_stmt = builder.get_int32(n); + auto loop = builder.create_range_for(zero, n_stmt, 0, 4); { auto _ = builder.get_loop_guard(loop); - auto *index = builder.get_loop_index(loop); - auto *ptr = builder.create_global_ptr(place, {index}); - builder.create_global_store(ptr, index); + auto index = builder.get_loop_index(loop); + auto const_2 = builder.get_int32(2); + auto mult2 = builder.create_mul(index, const_2); + auto const_1 = builder.get_int32(1); + auto plus1 = builder.create_add(mult2, const_1); + auto ptr = builder.create_global_ptr(place, {index}); + builder.create_global_store(ptr, plus1); } kernel_init = @@ -87,19 +95,23 @@ int main() { return sum */ IRBuilder builder; - auto *sum = builder.create_local_var(PrimitiveType::i32); - auto *loop = builder.create_struct_for(pointer, 0, 4); + auto sum = builder.create_local_var(PrimitiveType::i32); + auto loop = builder.create_struct_for(pointer, 0, 4); { auto _ = builder.get_loop_guard(loop); - auto *index = builder.get_loop_index(loop); - auto *sum_old = builder.create_local_load(sum); - auto *place_index = + auto index = builder.get_loop_index(loop); + auto sum_old = builder.create_local_load(sum); + auto place_index = builder.create_global_load(builder.create_global_ptr(place, {index})); builder.create_local_store(sum, builder.create_add(sum_old, place_index)); } + // TODO: fix this (or remove) builder.create_return(builder.create_local_load(sum)); kernel_ret = std::make_unique(program, builder.extract_ir(), "ret"); + + kernel_ret->insert_ret(PrimitiveType::i32); + kernel_ret->finalize_rets(); } { @@ -111,40 +123,46 @@ int main() { # ext = place.to_numpy() */ IRBuilder builder; - auto *loop = builder.create_struct_for(pointer, 0, 4); + auto loop = builder.create_struct_for(pointer, 0, 4); { auto _ = builder.get_loop_guard(loop); - auto *index = builder.get_loop_index(loop); - auto *ext = builder.create_external_ptr( - builder.create_arg_load({0}, PrimitiveType::i32, true, 0), {index}); - auto *place_index = - builder.create_global_load(builder.create_global_ptr(place, {index})); - builder.create_global_store(ext, place_index); + auto index = builder.get_loop_index(loop); + auto dt = TypeFactory::get_instance().get_ndarray_struct_type( + get_data_type(), 1); + auto arg_load = builder.create_arg_load({0}, dt, true, 0, false); + auto ext = builder.create_external_ptr(arg_load, {index}); + auto global_ptr_place = builder.create_global_ptr(place, {index}); + auto val = builder.create_global_load(global_ptr_place); + builder.create_global_store(ext, val); } kernel_ext = std::make_unique(program, builder.extract_ir(), "ext"); - kernel_ext->insert_arr_param(get_data_type(), /*total_dim=*/1, {n}); + kernel_ext->insert_ndarray_param(get_data_type(), /*total_dim=*/1); kernel_ext->finalize_params(); } auto ctx_init = kernel_init->make_launch_context(); auto ctx_ret = kernel_ret->make_launch_context(); auto ctx_ext = kernel_ext->make_launch_context(); - std::vector ext_arr(n); - ctx_ext.set_arg_external_array_with_shape({0}, taichi::uint64(ext_arr.data()), - n, {n}); + auto ext_arr = std::make_unique(n); + ctx_ext.set_arg_external_array_with_shape({0}, (uint64)ext_arr.get(), n, {n}); + + std::cout << "running init kernel ============================" << std::endl; { const auto &compiled_kernel_data = program.compile_kernel(config, program.get_device_caps(), *kernel_init); program.launch_kernel(compiled_kernel_data, ctx_init); } + std::cout << "running ret kernel ============================" << std::endl; { const auto &compiled_kernel_data = program.compile_kernel(config, program.get_device_caps(), *kernel_ret); program.launch_kernel(compiled_kernel_data, ctx_ret); + std::cout << "after launch ret kernel" << std::endl; std::cout << program.fetch_result(0) << std::endl; } + std::cout << "running ext kernel ============================" << std::endl; { const auto &compiled_kernel_data = program.compile_kernel(config, program.get_device_caps(), *kernel_ext); diff --git a/taichi/ir/ir_builder.cpp b/taichi/ir/ir_builder.cpp index b28412cd441f4..297d68636ce63 100644 --- a/taichi/ir/ir_builder.cpp +++ b/taichi/ir/ir_builder.cpp @@ -181,9 +181,10 @@ RandStmt *IRBuilder::create_rand(DataType value_type) { ArgLoadStmt *IRBuilder::create_arg_load(const std::vector &arg_id, DataType dt, bool is_ptr, - int arg_depth) { - return insert(Stmt::make_typed(arg_id, dt, is_ptr, - /*create_load*/ true, arg_depth)); + int arg_depth, + bool create_load) { + return insert(Stmt::make_typed(arg_id, dt, is_ptr, create_load, + arg_depth)); } ReturnStmt *IRBuilder::create_return(Stmt *value) { diff --git a/taichi/ir/ir_builder.h b/taichi/ir/ir_builder.h index c585ed7be425e..1eeec11521da4 100644 --- a/taichi/ir/ir_builder.h +++ b/taichi/ir/ir_builder.h @@ -148,7 +148,8 @@ class IRBuilder { ArgLoadStmt *create_arg_load(const std::vector &arg_id, DataType dt, bool is_ptr, - int arg_depth); + int arg_depth, + bool create_load = true); // Load kernel arguments. ArgLoadStmt *create_ndarray_arg_load(const std::vector &arg_id, DataType dt, diff --git a/tests/python/test_cpp_examples.py b/tests/python/test_cpp_examples.py new file mode 100644 index 0000000000000..9e9fb42cba6f3 --- /dev/null +++ b/tests/python/test_cpp_examples.py @@ -0,0 +1,25 @@ +import pytest +import os +from os.path import join +import subprocess + +from tests import test_utils + + +def load_cpp_example_tests(): + files = [file for file in os.listdir("build") if file.startswith("cpp_examples_")] + filepaths = [join("build", file) for file in files] + return filepaths + + +@test_utils.test() +def test_exist_cpp_example_tests(): + print(os.getcwd()) + filepaths = load_cpp_example_tests() + assert len(filepaths) > 0, "No cpp examples found in build directory" + + +@pytest.mark.parametrize("filepath", load_cpp_example_tests()) +@test_utils.test() +def test_cpp_example(filepath: str) -> None: + subprocess.check_output(filepath)