Skip to content

Commit 0d40036

Browse files
authored
Merge pull request #27 from GPUOpen-LibrariesAndSDKs/next-release-5
Next release 5
2 parents b9e664d + 92e7d4c commit 0d40036

11 files changed

+279
-212
lines changed

CMakeLists.txt

+121-17
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ option(NO_ENCRYPT "Don't encrypt kernel source and binaries" OFF)
1616
option(NO_UNITTEST "Don't build unit tests" OFF)
1717
option(HIPRT_PREFER_HIP_5 "Prefer HIP 5" OFF)
1818

19+
option(FORCE_DISABLE_CUDA "By default Cuda support is automatically added if a Cuda install is detected. Turn this flag to ON to force Cuda to be disabled." OFF)
1920

2021

2122
find_program(PYTHON_EXECUTABLE
@@ -186,6 +187,18 @@ function(get_hip_sdk_version result result_path)
186187

187188

188189
endif()
190+
191+
# build hip command for Linux
192+
else()
193+
194+
# If not defined we try to take it from the PATH
195+
if(NOT HIP_PATH)
196+
set(hipCommand "hipcc")
197+
198+
# otherwise, build the hipcc command with full path.
199+
else()
200+
set(hipCommand "${HIP_PATH}/bin/${hipCommand}")
201+
endif()
189202
endif()
190203

191204

@@ -296,9 +309,12 @@ add_definitions(-D__USE_HIP__)
296309
add_definitions(-DHIPRT_PUBLIC_REPO)
297310

298311

299-
# Enable CUDA if possible
300-
include(${CMAKE_CURRENT_SOURCE_DIR}/contrib/Orochi/Orochi/enable_cuew.cmake)
301-
312+
if (NOT FORCE_DISABLE_CUDA)
313+
# Enable CUDA if possible
314+
include(${CMAKE_CURRENT_SOURCE_DIR}/contrib/Orochi/Orochi/enable_cuew.cmake)
315+
else()
316+
message(STATUS "CUDA support is forced to disabled.")
317+
endif()
302318

303319

304320
# Base output directory
@@ -361,20 +377,93 @@ if(HIPRT_PREFER_HIP_5)
361377
endif()
362378

363379

380+
381+
# files generated by compile.py and precompile_bitcode.py
382+
if(WIN32)
383+
set(KERNEL_OS_POSTFIX "win")
384+
else()
385+
set(KERNEL_OS_POSTFIX "linux")
386+
endif()
387+
set(KERNEL_HIPRT_COMP "${BASE_OUTPUT_DIR}/${CMAKE_BUILD_TYPE}/hiprt${version_str_}_${HIP_VERSION_STR}_amd.hipfb") # example: hiprt02005_6.2_amd.hipfb
388+
set(KERNEL_UNITTEST_COMP "${BASE_OUTPUT_DIR}/${CMAKE_BUILD_TYPE}/hiprt${version_str_}_${HIP_VERSION_STR}_precompiled_bitcode_${KERNEL_OS_POSTFIX}.hipfb") # example: hiprt02005_6.2_precompiled_bitcode_win.hipfb
389+
set(KERNEL_OROCHI_COMP "${BASE_OUTPUT_DIR}/${CMAKE_BUILD_TYPE}/oro_compiled_kernels.hipfb")
390+
391+
364392
# precompile kernels:
365393
if(PRECOMPILE)
366-
message(">> Execute: ${PYTHON_EXECUTABLE} compile.py --nvidia --hipSdkPath \"${HIP_FINAL_PATH}\"")
367-
execute_process(
368-
COMMAND ${PYTHON_EXECUTABLE} compile.py --nvidia --hipSdkPath ${HIP_FINAL_PATH}
369-
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/scripts/bitcodes
394+
395+
if(FORCE_DISABLE_CUDA)
396+
set(CUDA_OPTION "")
397+
else()
398+
set(CUDA_OPTION "--nvidia")
399+
endif()
400+
401+
402+
set(bvh_source
403+
${CMAKE_SOURCE_DIR}/hiprt/hiprt_vec.h
404+
${CMAKE_SOURCE_DIR}/hiprt/hiprt_math.h
405+
${CMAKE_SOURCE_DIR}/hiprt/impl/Aabb.h
406+
${CMAKE_SOURCE_DIR}/hiprt/impl/AabbList.h
407+
${CMAKE_SOURCE_DIR}/hiprt/impl/BvhCommon.h
408+
${CMAKE_SOURCE_DIR}/hiprt/impl/BvhNode.h
409+
${CMAKE_SOURCE_DIR}/hiprt/impl/Geometry.h
410+
${CMAKE_SOURCE_DIR}/hiprt/impl/QrDecomposition.h
411+
${CMAKE_SOURCE_DIR}/hiprt/impl/Quaternion.h
412+
${CMAKE_SOURCE_DIR}/hiprt/impl/Transform.h
413+
${CMAKE_SOURCE_DIR}/hiprt/impl/Instance.h
414+
${CMAKE_SOURCE_DIR}/hiprt/impl/InstanceList.h
415+
${CMAKE_SOURCE_DIR}/hiprt/impl/MortonCode.h
416+
${CMAKE_SOURCE_DIR}/hiprt/impl/Scene.h
417+
${CMAKE_SOURCE_DIR}/hiprt/impl/TriangleMesh.h
418+
${CMAKE_SOURCE_DIR}/hiprt/impl/Triangle.h
419+
${CMAKE_SOURCE_DIR}/hiprt/impl/BvhBuilderUtil.h
420+
${CMAKE_SOURCE_DIR}/hiprt/impl/SbvhCommon.h
421+
${CMAKE_SOURCE_DIR}/hiprt/impl/ApiNodeList.h
422+
${CMAKE_SOURCE_DIR}/hiprt/impl/BvhConfig.h
423+
${CMAKE_SOURCE_DIR}/hiprt/impl/MemoryArena.h
424+
${CMAKE_SOURCE_DIR}/hiprt/hiprt_types.h
425+
${CMAKE_SOURCE_DIR}/hiprt/hiprt_common.h
426+
)
427+
428+
message(">> add_custom_command: ${PYTHON_EXECUTABLE} compile.py ${CUDA_OPTION} --hipSdkPath \"${HIP_FINAL_PATH}\"")
429+
add_custom_command(
430+
OUTPUT ${KERNEL_HIPRT_COMP} ${KERNEL_OROCHI_COMP}
431+
COMMAND ${PYTHON_EXECUTABLE} compile.py ${CUDA_OPTION} --hipSdkPath ${HIP_FINAL_PATH}
432+
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/scripts/bitcodes
433+
COMMENT "Precompiling kernels via compile.py"
434+
VERBATIM
435+
DEPENDS ${bvh_source}
436+
)
437+
438+
# create the 'precompile_kernels' project
439+
add_custom_target(precompile_kernels ALL
440+
DEPENDS ${KERNEL_HIPRT_COMP} ${KERNEL_OROCHI_COMP}
370441
)
371442

372443
if(NOT NO_UNITTEST)
373-
message(">> Execute: ${PYTHON_EXECUTABLE} precompile_bitcode.py --nvidia --hipSdkPath \"${HIP_FINAL_PATH}\"")
374-
execute_process(
375-
COMMAND ${PYTHON_EXECUTABLE} precompile_bitcode.py --nvidia --hipSdkPath ${HIP_FINAL_PATH}
444+
445+
set(unittest_kernel_source
446+
${CMAKE_SOURCE_DIR}/test/bitcodes/custom_func_table.cpp
447+
${CMAKE_SOURCE_DIR}/test/bitcodes/unit_test.cpp
448+
)
449+
450+
message(">> add_custom_command: ${PYTHON_EXECUTABLE} precompile_bitcode.py ${CUDA_OPTION} --hipSdkPath \"${HIP_FINAL_PATH}\"")
451+
add_custom_command(
452+
OUTPUT ${KERNEL_UNITTEST_COMP}
453+
COMMAND ${PYTHON_EXECUTABLE} precompile_bitcode.py ${CUDA_OPTION} --hipSdkPath ${HIP_FINAL_PATH}
454+
DEPENDS ${KERNEL_HIPRT_COMP} # Ensure compile.py has already run.
376455
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/scripts/bitcodes
456+
COMMENT "Precompiling unit tests kernels via precompile_bitcode.py"
457+
VERBATIM
458+
DEPENDS ${unittest_kernel_source}
377459
)
460+
461+
# create the 'precompile_unittest_kernels' project
462+
add_custom_target(precompile_unittest_kernels ALL
463+
DEPENDS ${KERNEL_UNITTEST_COMP}
464+
)
465+
466+
add_dependencies(${HIPRT_NAME} precompile_unittest_kernels)
378467
endif()
379468

380469

@@ -386,23 +475,38 @@ endif()
386475
# it's expected the step 'PRECOMPILE' has been executed.
387476
if ( BAKE_COMPILED_KERNEL )
388477

389-
message(">> Generate embedded precompiled")
478+
message(">> precompiled will be embedded.")
390479

391480
set(PYTHON_FILE "${CMAKE_CURRENT_SOURCE_DIR}/contrib/Orochi/scripts/convert_binary_to_array.py")
392481

393-
set(KERNEL_HIPRT_COMP "${BASE_OUTPUT_DIR}/${CMAKE_BUILD_TYPE}/hiprt${version_str_}_${HIP_VERSION_STR}_amd.hipfb")
482+
# HIPRT binary
394483
set(KERNEL_HIPRT_H "${CMAKE_CURRENT_SOURCE_DIR}/hiprt/impl/bvh_build_array.h")
395-
execute_process(
484+
add_custom_command(
485+
OUTPUT ${KERNEL_HIPRT_H}
396486
COMMAND ${PYTHON_EXECUTABLE} ${PYTHON_FILE} ${KERNEL_HIPRT_COMP} ${KERNEL_HIPRT_H}
487+
DEPENDS ${KERNEL_HIPRT_COMP} # Ensure compile.py has already run.
397488
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
398-
)
489+
COMMENT "Converting HIPRT compiled kernel to header"
490+
VERBATIM
491+
)
399492

400-
set(KERNEL_OROCHI_COMP "${BASE_OUTPUT_DIR}/${CMAKE_BUILD_TYPE}/oro_compiled_kernels.hipfb")
493+
# Orochi binary
401494
set(KERNEL_OROCHI_H "${CMAKE_CURRENT_SOURCE_DIR}/contrib/Orochi/ParallelPrimitives/cache/oro_compiled_kernels.h")
402-
execute_process(
495+
add_custom_command(
496+
OUTPUT ${KERNEL_OROCHI_H}
403497
COMMAND ${PYTHON_EXECUTABLE} ${PYTHON_FILE} ${KERNEL_OROCHI_COMP} ${KERNEL_OROCHI_H}
498+
DEPENDS ${KERNEL_OROCHI_COMP} # Ensure compile.py has already run.
404499
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
405-
)
500+
COMMENT "Converting Orochi compiled kernel to header"
501+
VERBATIM
502+
)
503+
504+
# Create the 'bake_compiled_kernels' project
505+
add_custom_target(bake_compiled_kernels ALL
506+
DEPENDS ${KERNEL_HIPRT_H} ${KERNEL_OROCHI_H}
507+
)
508+
509+
add_dependencies(${HIPRT_NAME} precompile_kernels bake_compiled_kernels)
406510

407511
endif()
408512

hiprt/impl/BvhBuilderKernels.h

+70-46
Original file line numberDiff line numberDiff line change
@@ -202,8 +202,6 @@ SingletonConstruction( uint32_t index, PrimitiveContainer& primitives, BoxNode*
202202
leafType = InstanceType;
203203
}
204204

205-
primNodes[0].m_parentAddr = 0;
206-
207205
BoxNode root;
208206
root.m_box0 = primitives.fetchAabb( 0 );
209207
root.m_box1.reset();
@@ -415,58 +413,78 @@ extern "C" __global__ void ComputeMortonCodes_InstanceList_MatrixFrame(
415413
ComputeMortonCodes<InstanceList<MatrixFrame>>( primitives, centroidBox, mortonCodeKeys, mortonCodeValues );
416414
}
417415

418-
extern "C" __global__ void ResetCounters( uint32_t primCount, BoxNode* boxNodes )
416+
template <typename PrimitiveContainer, typename PrimitiveNode, typename Header>
417+
__device__ void ResetCountersAndUpdateLeaves(
418+
const Header* header, PrimitiveContainer& primitives, BoxNode* boxNodes, PrimitiveNode* primNodes )
419419
{
420420
const uint32_t index = threadIdx.x + blockIdx.x * blockDim.x;
421-
if ( index < primCount ) boxNodes[index].m_updateCounter = 0;
421+
422+
if ( index < header->m_boxNodeCount ) boxNodes[index].m_updateCounter = 0;
423+
424+
if constexpr ( is_same<PrimitiveNode, TriangleNode>::value )
425+
{
426+
if ( index < header->m_primNodeCount )
427+
{
428+
primNodes[index] = primitives.fetchTriangleNode( { primNodes[index].m_primIndex0, primNodes[index].m_primIndex1 } );
429+
}
430+
}
431+
else if constexpr ( is_same<PrimitiveNode, InstanceNode>::value )
432+
{
433+
if ( index < primitives.getFrameCount() ) primitives.convertFrame( index );
434+
435+
if ( index < header->m_primNodeCount )
436+
{
437+
const uint32_t primIndex = primNodes[index].m_primIndex;
438+
hiprtTransformHeader transform = primitives.fetchTransformHeader( primIndex );
439+
primNodes[index].m_mask = primitives.fetchMask( primIndex );
440+
if ( transform.frameCount == 1 )
441+
primNodes[index].m_identity =
442+
primitives.copyInvTransformMatrix( transform.frameIndex, primNodes[index].m_matrix ) ? 1 : 0;
443+
}
444+
}
422445
}
423446

424-
template <typename InstanceList>
425-
__device__ void ResetCountersAndUpdateFrames( InstanceList& instanceList, BoxNode* boxNodes )
447+
extern "C" __global__ void ResetCountersAndUpdateLeaves_TriangleMesh_TriangleNode(
448+
const GeomHeader* header, TriangleMesh primitives, BoxNode* boxNodes, TriangleNode* primNodes )
426449
{
427-
const uint32_t index = threadIdx.x + blockIdx.x * blockDim.x;
428-
if ( index < instanceList.getCount() ) boxNodes[index].m_updateCounter = 0;
429-
if ( index < instanceList.getFrameCount() ) instanceList.convertFrame( index );
450+
ResetCountersAndUpdateLeaves( header, primitives, boxNodes, primNodes );
430451
}
431452

432-
extern "C" __global__ void
433-
ResetCountersAndUpdateFrames_InstanceList_SRTFrame( InstanceList<SRTFrame> instanceList, BoxNode* boxNodes )
453+
extern "C" __global__ void ResetCountersAndUpdateLeaves_AabbList_CustomNode(
454+
const GeomHeader* header, AabbList primitives, BoxNode* boxNodes, CustomNode* primNodes )
434455
{
435-
ResetCountersAndUpdateFrames<InstanceList<SRTFrame>>( instanceList, boxNodes );
456+
ResetCountersAndUpdateLeaves( header, primitives, boxNodes, primNodes );
436457
}
437458

438-
extern "C" __global__ void
439-
ResetCountersAndUpdateFrames_InstanceList_MatrixFrame( InstanceList<MatrixFrame> instanceList, BoxNode* boxNodes )
459+
extern "C" __global__ void ResetCountersAndUpdateLeaves_InstanceList_MatrixFrame_InstanceNode(
460+
const SceneHeader* header, InstanceList<MatrixFrame> primitives, BoxNode* boxNodes, InstanceNode* primNodes )
440461
{
441-
ResetCountersAndUpdateFrames<InstanceList<MatrixFrame>>( instanceList, boxNodes );
462+
ResetCountersAndUpdateLeaves( header, primitives, boxNodes, primNodes );
442463
}
443464

444-
template <typename PrimitiveContainer, typename PrimitiveNode>
445-
__device__ void FitBounds( PrimitiveContainer& primitives, BoxNode* boxNodes, PrimitiveNode* primNodes )
465+
extern "C" __global__ void ResetCountersAndUpdateLeaves_InstanceList_SRTFrame_InstanceNode(
466+
const SceneHeader* header, InstanceList<SRTFrame> primitives, BoxNode* boxNodes, InstanceNode* primNodes )
467+
{
468+
ResetCountersAndUpdateLeaves( header, primitives, boxNodes, primNodes );
469+
}
470+
471+
template <typename PrimitiveContainer, typename PrimitiveNode, typename Header>
472+
__device__ void FitBounds( Header* header, PrimitiveContainer& primitives, BoxNode* boxNodes, PrimitiveNode* primNodes )
446473
{
447474
uint32_t index = threadIdx.x + blockIdx.x * blockDim.x;
448475

449-
if ( index >= primitives.getCount() ) return;
476+
if ( index >= header->m_boxNodeCount ) return;
450477

451-
uint32_t parentAddr = primNodes[index].m_parentAddr;
452-
if constexpr ( is_same<PrimitiveNode, TriangleNode>::value )
453-
{
454-
primNodes[index] =
455-
primitives.fetchTriangleNode( make_uint2( primNodes[index].m_primIndex0, primNodes[index].m_primIndex1 ) );
456-
primNodes[index].m_parentAddr = parentAddr;
457-
}
458-
else if constexpr ( is_same<PrimitiveNode, InstanceNode>::value )
478+
BoxNode node = boxNodes[index];
479+
uint32_t internalCount = 0;
480+
for ( uint32_t i = 0; i < node.m_childCount; ++i )
459481
{
460-
const uint32_t primIndex = primNodes[index].m_primIndex;
461-
hiprtTransformHeader transform = primitives.fetchTransformHeader( primIndex );
462-
primNodes[index].m_mask = primitives.fetchMask( primIndex );
463-
if ( transform.frameCount == 1 )
464-
primNodes[index].m_identity =
465-
primitives.copyInvTransformMatrix( transform.frameIndex, primNodes[index].m_matrix ) ? 1 : 0;
482+
if ( node.getChildType( i ) == BoxType ) internalCount++;
466483
}
467484

468-
index = parentAddr;
469-
while ( index != InvalidValue && atomicAdd( &boxNodes[index].m_updateCounter, 1 ) >= boxNodes[index].m_childCount - 1 )
485+
if ( internalCount > 0 ) return;
486+
487+
while ( true )
470488
{
471489
__threadfence();
472490

@@ -484,33 +502,40 @@ __device__ void FitBounds( PrimitiveContainer& primitives, BoxNode* boxNodes, Pr
484502
if ( node.m_childIndex3 != InvalidValue )
485503
node.m_box3 = getNodeBox( node.m_childIndex3, primitives, boxNodes, primNodes );
486504

487-
index = boxNodes[index].m_parentAddr;
505+
internalCount = 0;
506+
for ( uint32_t i = 0; i < node.m_childCount; ++i )
507+
{
508+
if ( node.getChildType( i ) == BoxType ) internalCount++;
509+
}
488510

489511
__threadfence();
512+
513+
if ( atomicAdd( &node.m_updateCounter, 1 ) < internalCount - 1 ) break;
490514
}
491515
}
492516

493517
extern "C" __global__ void
494-
FitBounds_TriangleMesh_TriangleNode( TriangleMesh primitives, BoxNode* boxNodes, TriangleNode* primNodes )
518+
FitBounds_TriangleMesh_TriangleNode( GeomHeader* header, TriangleMesh primitives, BoxNode* boxNodes, TriangleNode* primNodes )
495519
{
496-
FitBounds<TriangleMesh, TriangleNode>( primitives, boxNodes, primNodes );
520+
FitBounds<TriangleMesh, TriangleNode>( header, primitives, boxNodes, primNodes );
497521
}
498522

499-
extern "C" __global__ void FitBounds_AabbList_CustomNode( AabbList primitives, BoxNode* boxNodes, CustomNode* primNodes )
523+
extern "C" __global__ void
524+
FitBounds_AabbList_CustomNode( GeomHeader* header, AabbList primitives, BoxNode* boxNodes, CustomNode* primNodes )
500525
{
501-
FitBounds<AabbList, CustomNode>( primitives, boxNodes, primNodes );
526+
FitBounds<AabbList, CustomNode>( header, primitives, boxNodes, primNodes );
502527
}
503528

504-
extern "C" __global__ void
505-
FitBounds_InstanceList_SRTFrame_InstanceNode( InstanceList<SRTFrame> primitives, BoxNode* boxNodes, InstanceNode* primNodes )
529+
extern "C" __global__ void FitBounds_InstanceList_SRTFrame_InstanceNode(
530+
SceneHeader* header, InstanceList<SRTFrame> primitives, BoxNode* boxNodes, InstanceNode* primNodes )
506531
{
507-
FitBounds<InstanceList<SRTFrame>, InstanceNode>( primitives, boxNodes, primNodes );
532+
FitBounds<InstanceList<SRTFrame>, InstanceNode>( header, primitives, boxNodes, primNodes );
508533
}
509534

510535
extern "C" __global__ void FitBounds_InstanceList_MatrixFrame_InstanceNode(
511-
InstanceList<MatrixFrame> primitives, BoxNode* boxNodes, InstanceNode* primNodes )
536+
SceneHeader* header, InstanceList<MatrixFrame> primitives, BoxNode* boxNodes, InstanceNode* primNodes )
512537
{
513-
FitBounds<InstanceList<MatrixFrame>, InstanceNode>( primitives, boxNodes, primNodes );
538+
FitBounds<InstanceList<MatrixFrame>, InstanceNode>( header, primitives, boxNodes, primNodes );
514539
}
515540

516541
template <typename PrimitiveContainer, typename PrimitiveNode, typename Header>
@@ -635,8 +660,7 @@ __device__ void Collapse(
635660
else
636661
primNodes[nodeAddr].m_transform = transform;
637662
}
638-
primNodes[nodeAddr].m_parentAddr = parentAddr;
639-
done = true;
663+
done = true;
640664
}
641665
}
642666

0 commit comments

Comments
 (0)