-
Notifications
You must be signed in to change notification settings - Fork 2.6k
[BUG FIX] Fix BVH build's radix sort. #1305
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 30 commits
90da431
0864103
8d07248
6d9f319
197453b
bf0ee25
d385da6
fe432f0
b1a7d2d
82ffb7e
0d564a2
1241c5b
bd5aa0f
122e221
2778529
54d5646
e74be79
bf60281
604aef5
3845104
5cc72a0
07e2112
2c24e59
d7889fd
593892b
d33f8dc
624eed4
98d30ab
5fad9b7
253dc10
f04500c
86b6b2a
4e70712
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,7 @@ | ||
| import genesis as gs | ||
| import taichi as ti | ||
| from genesis.repr_base import RBC | ||
| import numpy as np | ||
|
|
||
|
|
||
| @ti.data_oriented | ||
|
|
@@ -157,19 +158,27 @@ class Node: | |
| # Nodes of the BVH, first n_aabbs - 1 are internal nodes, last n_aabbs are leaf nodes | ||
| self.nodes = self.Node.field(shape=(self.n_batches, self.n_aabbs * 2 - 1)) | ||
| # Whether an internal node has been visited during traversal | ||
| self.internal_node_visited = ti.field(ti.u8, shape=(self.n_batches, self.n_aabbs - 1)) | ||
| self.internal_node_active = ti.field(ti.u1, shape=(self.n_batches, self.n_aabbs - 1)) | ||
| self.internal_node_ready = ti.field(ti.u1, shape=(self.n_batches, self.n_aabbs - 1)) | ||
| self.updated = ti.field(ti.u1, shape=()) | ||
|
|
||
| # Query results, vec3 of batch id, self id, query id | ||
| self.query_result = ti.field(gs.ti_ivec3, shape=(self.max_n_query_results)) | ||
| # Count of query results | ||
| self.query_result_count = ti.field(ti.i32, shape=()) | ||
|
|
||
| @ti.kernel | ||
| def build(self): | ||
| """ | ||
| Build the BVH from the axis-aligned bounding boxes (AABBs). | ||
| """ | ||
| self.compute_aabb_centers_and_scales() | ||
| self.compute_morton_codes() | ||
| self.radix_sort_morton_codes() | ||
| self.build_radix_tree() | ||
| self.compute_bounds() | ||
|
|
||
| @ti.kernel | ||
| def compute_aabb_centers_and_scales(self): | ||
| for i_b, i_a in ti.ndrange(self.n_batches, self.n_aabbs): | ||
| self.aabb_centers[i_b, i_a] = (self.aabbs[i_b, i_a].min + self.aabbs[i_b, i_a].max) / 2 | ||
|
|
||
|
|
@@ -186,12 +195,7 @@ def build(self): | |
| for i in ti.static(range(3)): | ||
| self.scale[i_b][i] = ti.select(scale[i] > 1e-7, 1.0 / scale[i], 1) | ||
|
|
||
| self.compute_morton_codes() | ||
| self.radix_sort_morton_codes() | ||
| self.build_radix_tree() | ||
| self.compute_bounds() | ||
|
|
||
| @ti.func | ||
| @ti.kernel | ||
| def compute_morton_codes(self): | ||
| """ | ||
| Compute the Morton codes for each AABB. | ||
|
|
@@ -223,38 +227,43 @@ def expand_bits(self, v): | |
| v = (v * ti.u32(0x00000005)) & ti.u32(0x49249249) | ||
| return v | ||
|
|
||
| @ti.func | ||
| def radix_sort_morton_codes(self): | ||
| """ | ||
| Radix sort the morton codes, using 8 bits at a time. | ||
| """ | ||
| for i in ti.static(range(8)): | ||
| # Clear histogram | ||
| for i_b, j in ti.ndrange(self.n_batches, 256): | ||
| self.hist[i_b, j] = 0 | ||
| for i in range(8): | ||
| self._kernel_radix_sort_morton_codes_one_round(i) | ||
|
|
||
| # Fill histogram | ||
| for i_b, i_a in ti.ndrange(self.n_batches, self.n_aabbs): | ||
| @ti.kernel | ||
| def _kernel_radix_sort_morton_codes_one_round(self, i: int): | ||
| # Clear histogram | ||
| self.hist.fill(0) | ||
|
|
||
| # Fill histogram | ||
| for i_b in range(self.n_batches): | ||
| # This is now sequential | ||
| # TODO Parallelize, need to use groups to handle data to remain stable, could be not worth it | ||
| for i_a in range(self.n_aabbs): | ||
| code = (self.morton_codes[i_b, i_a] >> (i * 8)) & 0xFF | ||
| self.offset[i_b, i_a] = ti.atomic_add(self.hist[i_b, ti.i32(code)], 1) | ||
|
|
||
| # Compute prefix sum | ||
| for i_b in ti.ndrange(self.n_batches): | ||
| self.prefix_sum[i_b, 0] = 0 | ||
| for j in range(1, 256): # sequential prefix sum | ||
| self.prefix_sum[i_b, j] = self.prefix_sum[i_b, j - 1] + self.hist[i_b, j - 1] | ||
| # Compute prefix sum | ||
| for i_b in ti.ndrange(self.n_batches): | ||
| self.prefix_sum[i_b, 0] = 0 | ||
| for j in range(1, 256): # sequential prefix sum | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Though it has been merged, just to mention there is a built-in parallel prefix sum, which might be useful: https://github.com/taichi-dev/taichi/blob/master/python/taichi/algorithms/_algorithms.py#L42
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh really! This is interesting!
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for pointing this out. I think if we are doing prefix sum for just 256 elements, sequential would be fine. |
||
| self.prefix_sum[i_b, j] = self.prefix_sum[i_b, j - 1] + self.hist[i_b, j - 1] | ||
|
|
||
| # Reorder morton codes | ||
| for i_b, i_a in ti.ndrange(self.n_batches, self.n_aabbs): | ||
| code = (self.morton_codes[i_b, i_a] >> (i * 8)) & 0xFF | ||
| idx = ti.i32(self.offset[i_b, i_a] + self.prefix_sum[i_b, ti.i32(code)]) | ||
| self.tmp_morton_codes[i_b, idx] = self.morton_codes[i_b, i_a] | ||
| # Reorder morton codes | ||
| for i_b, i_a in ti.ndrange(self.n_batches, self.n_aabbs): | ||
| code = (self.morton_codes[i_b, i_a] >> (i * 8)) & 0xFF | ||
duburcqa marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| idx = ti.i32(self.offset[i_b, i_a] + self.prefix_sum[i_b, ti.i32(code)]) | ||
| self.tmp_morton_codes[i_b, idx] = self.morton_codes[i_b, i_a] | ||
|
|
||
| # Swap the temporary and original morton codes | ||
| for i_b, i_a in ti.ndrange(self.n_batches, self.n_aabbs): | ||
| self.morton_codes[i_b, i_a] = self.tmp_morton_codes[i_b, i_a] | ||
| # Swap the temporary and original morton codes | ||
| for i_b, i_a in ti.ndrange(self.n_batches, self.n_aabbs): | ||
| self.morton_codes[i_b, i_a] = self.tmp_morton_codes[i_b, i_a] | ||
|
|
||
| @ti.func | ||
| @ti.kernel | ||
| def build_radix_tree(self): | ||
| """ | ||
| Build the radix tree from the sorted morton codes. | ||
|
|
@@ -321,31 +330,51 @@ def delta(self, i, j, i_b): | |
| break | ||
| return result | ||
|
|
||
| @ti.func | ||
| def compute_bounds(self): | ||
| """ | ||
| Compute the bounds of the BVH nodes. | ||
|
|
||
| Starts from the leaf nodes and works upwards. | ||
| Starts from the leaf nodes and works upwards layer by layer. | ||
| """ | ||
| for i_b, i in ti.ndrange(self.n_batches, self.n_aabbs - 1): | ||
| self.internal_node_visited[i_b, i] = ti.u8(0) | ||
| self._kernel_compute_bounds_init() | ||
| while self.updated[None]: | ||
| self._kernel_compute_bounds_one_layer() | ||
|
|
||
| @ti.kernel | ||
| def _kernel_compute_bounds_init(self): | ||
| self.updated[None] = True | ||
| self.internal_node_active.fill(0) | ||
| self.internal_node_ready.fill(0) | ||
|
|
||
| for i_b, i in ti.ndrange(self.n_batches, self.n_aabbs): | ||
| idx = ti.i32(self.morton_codes[i_b, i]) | ||
| self.nodes[i_b, i + self.n_aabbs - 1].bound.min = self.aabbs[i_b, idx].min | ||
| self.nodes[i_b, i + self.n_aabbs - 1].bound.max = self.aabbs[i_b, idx].max | ||
| parent_idx = self.nodes[i_b, i + self.n_aabbs - 1].parent | ||
| if parent_idx != -1: | ||
| self.internal_node_active[i_b, parent_idx] = 1 | ||
|
|
||
| cur_idx = self.nodes[i_b, i + self.n_aabbs - 1].parent | ||
| while cur_idx != -1: | ||
| visited = ti.u1(ti.atomic_or(self.internal_node_visited[i_b, cur_idx], ti.u8(1))) | ||
| if not visited: | ||
| break | ||
| left_bound = self.nodes[i_b, self.nodes[i_b, cur_idx].left].bound | ||
| right_bound = self.nodes[i_b, self.nodes[i_b, cur_idx].right].bound | ||
| self.nodes[i_b, cur_idx].bound.min = ti.min(left_bound.min, right_bound.min) | ||
| self.nodes[i_b, cur_idx].bound.max = ti.max(left_bound.max, right_bound.max) | ||
| cur_idx = self.nodes[i_b, cur_idx].parent | ||
| @ti.kernel | ||
| def _kernel_compute_bounds_one_layer(self): | ||
| self.updated[None] = False | ||
| for i_b, i in ti.ndrange(self.n_batches, self.n_aabbs - 1): | ||
| if self.internal_node_active[i_b, i] == 0: | ||
| continue | ||
| left_bound = self.nodes[i_b, self.nodes[i_b, i].left].bound | ||
| right_bound = self.nodes[i_b, self.nodes[i_b, i].right].bound | ||
| self.nodes[i_b, i].bound.min = ti.min(left_bound.min, right_bound.min) | ||
| self.nodes[i_b, i].bound.max = ti.max(left_bound.max, right_bound.max) | ||
| parent_idx = self.nodes[i_b, i].parent | ||
| if parent_idx != -1: | ||
| self.internal_node_ready[i_b, parent_idx] = 1 | ||
| self.internal_node_active[i_b, i] = 0 | ||
| self.updated[None] = True | ||
|
|
||
| for i_b, i in ti.ndrange(self.n_batches, self.n_aabbs - 1): | ||
| if self.internal_node_ready[i_b, i] == 0: | ||
| continue | ||
| self.internal_node_active[i_b, i] = 1 | ||
| self.internal_node_ready[i_b, i] = 0 | ||
|
|
||
| @ti.kernel | ||
| def query(self, aabbs: ti.template()): | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.