Skip to content

Commit a88c336

Browse files
author
swfly
committed
add "extended_limits" flag support for metal: massive accels need this to trace properly.
1 parent 184814a commit a88c336

8 files changed

Lines changed: 59 additions & 23 deletions

File tree

include/luisa/runtime/rhi/resource.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,11 @@ struct ShaderOption {
155155
uint32_t max_registers{0};
156156
/// \brief Whether to measure time spent on each compilation phase.
157157
bool time_trace{false};
158+
/// \brief Whether to enable extended acceleration structure limits.
159+
/// \details If set to true, the shader will be compiled with support for
160+
/// massive instance counts (>2^24) in acceleration structures. Only has
161+
/// effect on the Metal backend; other backends ignore this option.
162+
bool enable_extended_accel_limits{false};
158163
/// \brief A user-defined name for the shader.
159164
/// \details If provided, the shader will be read from or written to disk
160165
/// via the `BinaryIO` object (passed to backends on device creation)
@@ -272,10 +277,12 @@ struct hash<compute::ShaderOption> {
272277
constexpr auto enable_fast_math_shift = 1u;
273278
constexpr auto enable_debug_info_shift = 2u;
274279
constexpr auto compile_only_shift = 3u;
280+
constexpr auto enable_extended_accel_limits_shift = 4u;
275281
auto opt_hash = hash_value((static_cast<uint>(option.enable_cache) << enable_cache_shift) |
276282
(static_cast<uint>(option.enable_fast_math) << enable_fast_math_shift) |
277283
(static_cast<uint>(option.enable_debug_info) << enable_debug_info_shift) |
278-
(static_cast<uint>(option.compile_only) << compile_only_shift),
284+
(static_cast<uint>(option.compile_only) << compile_only_shift) |
285+
(static_cast<uint>(option.enable_extended_accel_limits) << enable_extended_accel_limits_shift),
279286
seed);
280287
auto name_hash = hash_value(option.name, seed);
281288
return hash_combine({opt_hash, name_hash}, seed);

include/luisa/rust/api_types.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -521,6 +521,7 @@ typedef struct LCShaderOption {
521521
bool enable_debug_info;
522522
bool compile_only;
523523
bool time_trace;
524+
bool enable_extended_accel_limits;
524525
uint32_t max_registers;
525526
const char *name;
526527
const char *native_include;

src/api/runtime.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,7 @@ luisa_compute_shader_create(LCDevice device, LCKernelModule m, const LCShaderOpt
434434
.compile_only = option.compile_only,
435435
.max_registers = option.max_registers,
436436
.time_trace = option.time_trace,
437+
.enable_extended_accel_limits = option.enable_extended_accel_limits,
437438
.name = luisa::string{option.name},
438439
.native_include = luisa::string{option.native_include},
439440
};

src/backends/metal/metal_accel.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77

88
namespace luisa::compute::metal {
99

10+
namespace {
11+
constexpr auto metal_accel_default_instance_limit = 1u << 24u;
12+
}// namespace
13+
1014
MetalAccel::MetalAccel(MetalDevice *device, const AccelOption &option) noexcept
1115
: _update{device->builtin_update_accel_instances()},
1216
_option{option} { _resources.reserve(reserved_primitive_count); }
@@ -25,6 +29,7 @@ void MetalAccel::build(MetalCommandEncoder &encoder, AccelBuildCommand *command)
2529

2630
auto device = encoder.device();
2731
auto instance_count = command->instance_count();
32+
auto requires_extended_limits = instance_count > metal_accel_default_instance_limit;
2833
LUISA_ASSERT(instance_count > 0u, "Empty acceleration structure is not allowed.");
2934
if (auto size = instance_count * sizeof(MTL::AccelerationStructureInstanceDescriptor);
3035
_instance_buffer == nullptr || _instance_buffer->length() < size) {
@@ -91,8 +96,10 @@ void MetalAccel::build(MetalCommandEncoder &encoder, AccelBuildCommand *command)
9196
_requires_rebuild = _requires_rebuild /* pending rebuild */ ||
9297
_descriptor == nullptr || old_instance_count != instance_count /* instance count has changed */ ||
9398
_handle == nullptr /* not built before */ ||
99+
_requires_extended_limits != requires_extended_limits /* extended limits mode has changed */ ||
94100
!_option.allow_update /* accel cannot be refitted */ ||
95101
command->request() == AccelBuildRequest::FORCE_BUILD /* rebuild is forced */;
102+
_requires_extended_limits = requires_extended_limits;
96103

97104
// prepare the descriptor
98105
if (_requires_rebuild) {
@@ -114,6 +121,9 @@ void MetalAccel::build(MetalCommandEncoder &encoder, AccelBuildCommand *command)
114121
break;
115122
}
116123
if (_option.allow_update) { usage |= MTL::AccelerationStructureUsageRefit; }
124+
if (_requires_extended_limits) {
125+
usage |= MTL::AccelerationStructureUsageExtendedLimits;
126+
}
117127
_descriptor->setUsage(usage);
118128

119129
// update the descriptor

src/backends/metal/metal_accel.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ class MetalAccel {
2828
NS::String *_name{nullptr};
2929
AccelOption _option;
3030
bool _requires_rebuild{true};
31+
bool _requires_extended_limits{false};
3132
spin_mutex _mutex;
3233

3334
public:
@@ -48,11 +49,11 @@ class MetalAccel {
4849
[[nodiscard]] auto instance_buffer() const noexcept { return _instance_buffer; }
4950
[[nodiscard]] auto binding() const noexcept { return Binding{_handle->gpuResourceID(), _instance_buffer->gpuAddress()}; }
5051
[[nodiscard]] auto pointer_to_handle() const noexcept { return const_cast<void *>(static_cast<const void *>(&_handle)); }
52+
[[nodiscard]] auto requires_extended_limits() const noexcept { return _requires_extended_limits; }
5153
void set_name(luisa::string_view name) noexcept;
5254
void mark_resource_usages(MetalCommandEncoder &encoder,
5355
MTL::ComputeCommandEncoder *command_encoder,
5456
MTL::ResourceUsage usage) noexcept;
5557
};
5658

5759
}// namespace luisa::compute::metal
58-

src/backends/metal/metal_builtin/metal_device_lib.metal

Lines changed: 28 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,14 @@ using namespace metal;
88
#define lc_assume(...) __builtin_assume(__VA_ARGS__)
99
#define lc_assert(...)// TODO: implement assert?
1010

11+
#ifdef LUISA_ENABLE_EXTENDED_LIMITS
12+
#define LC_EXTENDED_LIMITS_TAG , extended_limits
13+
#define LC_INSTANCE_ACCELERATION_STRUCTURE instance_acceleration_structure<extended_limits>
14+
#else
15+
#define LC_EXTENDED_LIMITS_TAG
16+
#define LC_INSTANCE_ACCELERATION_STRUCTURE instance_acceleration_structure
17+
#endif
18+
1119
template<typename Ptr>
1220
[[nodiscard]] inline auto lc_address_of_impl(Ptr ptr) {
1321
return reinterpret_cast<ulong>(ptr);
@@ -768,19 +776,19 @@ static_assert(sizeof(LCInstance) == 64u, "");
768776

769777
struct LCAccel {
770778
#ifdef LUISA_ENABLE_MOTION_BLUR
771-
acceleration_structure<instancing, primitive_motion, instance_motion> handle;
779+
acceleration_structure<instancing, primitive_motion, instance_motion LC_EXTENDED_LIMITS_TAG> handle;
772780
#else
773-
instance_acceleration_structure handle;
781+
LC_INSTANCE_ACCELERATION_STRUCTURE handle;
774782
#endif
775783
device LCInstance *__restrict__ instances;
776784
};
777785

778786
[[nodiscard]] inline auto lc_intersector_base() {
779787
#ifdef LUISA_ENABLE_CURVE
780788
#ifdef LUISA_ENABLE_MOTION_BLUR
781-
intersector<triangle_data, curve_data, instancing, primitive_motion, instance_motion> i;
789+
intersector<triangle_data, curve_data, instancing, primitive_motion, instance_motion LC_EXTENDED_LIMITS_TAG> i;
782790
#else
783-
intersector<triangle_data, curve_data, instancing> i;
791+
intersector<triangle_data, curve_data, instancing LC_EXTENDED_LIMITS_TAG> i;
784792
#endif
785793
i.assume_geometry_type(geometry_type::triangle | geometry_type::curve);
786794
i.assume_curve_type(curve_type::round);
@@ -807,9 +815,9 @@ struct LCAccel {
807815
#endif
808816
#else
809817
#ifdef LUISA_ENABLE_MOTION_BLUR
810-
intersector<triangle_data, instancing, primitive_motion, instance_motion> i;
818+
intersector<triangle_data, instancing, primitive_motion, instance_motion LC_EXTENDED_LIMITS_TAG> i;
811819
#else
812-
intersector<triangle_data, instancing> i;
820+
intersector<triangle_data, instancing LC_EXTENDED_LIMITS_TAG> i;
813821
#endif
814822
i.assume_geometry_type(geometry_type::triangle);
815823
#endif
@@ -889,25 +897,25 @@ struct LCAccel {
889897

890898
struct LCRayQuery {
891899
#ifdef LUISA_ENABLE_MOTION_BLUR
892-
acceleration_structure<instancing, primitive_motion, instance_motion> accel;
900+
acceleration_structure<instancing, primitive_motion, instance_motion LC_EXTENDED_LIMITS_TAG> accel;
893901
#else
894-
instance_acceleration_structure accel;
902+
LC_INSTANCE_ACCELERATION_STRUCTURE accel;
895903
#endif
896904
ray ray;
897905
uint mask;
898906
float time;
899907
bool terminate_on_first_hit;
900908
#ifdef LUISA_ENABLE_CURVE
901909
#ifdef LUISA_ENABLE_MOTION_BLUR
902-
thread intersection_query<triangle_data, curve_data, instancing, primitive_motion, instance_motion> *i;
910+
thread intersection_query<triangle_data, curve_data, instancing, primitive_motion, instance_motion LC_EXTENDED_LIMITS_TAG> *i;
903911
#else
904-
thread intersection_query<triangle_data, curve_data, instancing> *i;
912+
thread intersection_query<triangle_data, curve_data, instancing LC_EXTENDED_LIMITS_TAG> *i;
905913
#endif
906914
#else
907915
#ifdef LUISA_ENABLE_MOTION_BLUR
908-
thread intersection_query<triangle_data, instancing, primitive_motion, instance_motion> *i;
916+
thread intersection_query<triangle_data, instancing, primitive_motion, instance_motion LC_EXTENDED_LIMITS_TAG> *i;
909917
#else
910-
thread intersection_query<triangle_data, instancing> *i;
918+
thread intersection_query<triangle_data, instancing LC_EXTENDED_LIMITS_TAG> *i;
911919
#endif
912920
#endif
913921
};
@@ -941,15 +949,15 @@ struct LCRayQuery {
941949
void ray_query_init(thread LCRayQuery &q,
942950
#ifdef LUISA_ENABLE_CURVE
943951
#ifdef LUISA_ENABLE_MOTION_BLUR
944-
thread intersection_query<triangle_data, curve_data, instancing, primitive_motion, instance_motion> &i,
952+
thread intersection_query<triangle_data, curve_data, instancing, primitive_motion, instance_motion LC_EXTENDED_LIMITS_TAG> &i,
945953
#else
946-
thread intersection_query<triangle_data, curve_data, instancing> &i,
954+
thread intersection_query<triangle_data, curve_data, instancing LC_EXTENDED_LIMITS_TAG> &i,
947955
#endif
948956
#else
949957
#ifdef LUISA_ENABLE_MOTION_BLUR
950-
thread intersection_query<triangle_data, instancing, primitive_motion, instance_motion> &i,
958+
thread intersection_query<triangle_data, instancing, primitive_motion, instance_motion LC_EXTENDED_LIMITS_TAG> &i,
951959
#else
952-
thread intersection_query<triangle_data, instancing> &i,
960+
thread intersection_query<triangle_data, instancing LC_EXTENDED_LIMITS_TAG> &i,
953961
#endif
954962
#endif
955963
bool has_procedural_branch) {
@@ -993,18 +1001,18 @@ void ray_query_init(thread LCRayQuery &q,
9931001
#ifdef LUISA_ENABLE_CURVE
9941002
#ifdef LUISA_ENABLE_MOTION_BLUR
9951003
#define LC_RAY_QUERY_SHADOW_VARIABLE(q) \
996-
intersection_query<triangle_data, curve_data, instancing, primitive_motion, instance_motion> q##_i
1004+
intersection_query<triangle_data, curve_data, instancing, primitive_motion, instance_motion LC_EXTENDED_LIMITS_TAG> q##_i
9971005
#else
9981006
#define LC_RAY_QUERY_SHADOW_VARIABLE(q) \
999-
intersection_query<triangle_data, curve_data, instancing> q##_i
1007+
intersection_query<triangle_data, curve_data, instancing LC_EXTENDED_LIMITS_TAG> q##_i
10001008
#endif
10011009
#else
10021010
#ifdef LUISA_ENABLE_MOTION_BLUR
10031011
#define LC_RAY_QUERY_SHADOW_VARIABLE(q) \
1004-
intersection_query<triangle_data, instancing, primitive_motion, instance_motion> q##_i
1012+
intersection_query<triangle_data, instancing, primitive_motion, instance_motion LC_EXTENDED_LIMITS_TAG> q##_i
10051013
#else
10061014
#define LC_RAY_QUERY_SHADOW_VARIABLE(q) \
1007-
intersection_query<triangle_data, instancing> q##_i
1015+
intersection_query<triangle_data, instancing LC_EXTENDED_LIMITS_TAG> q##_i
10081016
#endif
10091017
#endif
10101018

src/backends/metal/metal_device.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -459,7 +459,13 @@ ShaderCreationInfo MetalDevice::create_shader(const ShaderOption &option, Functi
459459
}
460460

461461
// create shader
462-
auto pipeline = _compiler->compile(scratch.string_view(), option, metadata);
462+
luisa::string source;
463+
if (option.enable_extended_accel_limits) {
464+
source = luisa::format("#define LUISA_ENABLE_EXTENDED_LIMITS\n{}", scratch.string_view());
465+
} else {
466+
source = luisa::string{scratch.string_view()};
467+
}
468+
auto pipeline = _compiler->compile(source, option, metadata);
463469
auto shader = luisa::new_with_allocator<MetalShader>(
464470
this, std::move(pipeline),
465471
std::move(metadata.argument_usages),

src/rust/luisa_compute_api_types/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ pub struct ShaderOption {
7878
pub enable_debug_info: bool,
7979
pub compile_only: bool,
8080
pub time_trace: bool,
81+
pub enable_extended_accel_limits: bool,
8182
pub max_registers: u32,
8283
pub name: *const std::ffi::c_char,
8384
pub native_include: *const std::ffi::c_char,
@@ -95,6 +96,7 @@ impl Default for ShaderOption {
9596
enable_debug_info: false,
9697
compile_only: false,
9798
time_trace: false,
99+
enable_extended_accel_limits: false,
98100
max_registers: 0,
99101
name: std::ptr::null(),
100102
native_include: std::ptr::null(),

0 commit comments

Comments
 (0)