Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ project(GPGPUTasks)

set(CMAKE_CXX_STANDARD 17)

# Если вы установили CUDA SDK - можете включить поддержку CUDA
include_directories(${CMAKE_SOURCE_DIR})

option(GPU_CUDA_SUPPORT "CUDA support." OFF)

# GLSLC_BIN используйтся для компиляции Vulkan шейдеров (GLSL)
Expand Down Expand Up @@ -64,7 +65,12 @@ set(LIBRARIES
set(OPENCL_SOURCES
src/kernels/cl/aplusb.cl
src/kernels/cl/ray_tracing_render_brute_force.cl
src/kernels/cl/ray_tracing_render_using_lbvh.cl)
src/kernels/cl/ray_tracing_render_using_lbvh.cl
src/kernels/cl/lbvh_compute_triangle_data.cl
src/kernels/cl/lbvh_compute_bounds.cl
src/kernels/cl/lbvh_compute_morton_codes.cl
src/kernels/cl/lbvh_build_nodes.cl
src/kernels/cl/lbvh_compute_aabbs.cl)

set(OPENCL_INCLUDES src/kernels/defines.h src/kernels/cl/helpers/rassert.cl src/kernels/cl/camera_helpers.cl src/kernels/cl/geometry_helpers.cl src/kernels/cl/random_helpers.cl src/kernels/shared_structs/aabb_gpu_shared.h src/kernels/shared_structs/bvh_node_gpu_shared.h src/kernels/shared_structs/camera_gpu_shared.h src/kernels/shared_structs/morton_code_gpu_shared.h src/kernels/shared_structs/struct_helpers.h)
set(OPENCL_DEFINES)
Expand Down
93 changes: 93 additions & 0 deletions src/kernels/cl/lbvh_build_nodes.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#ifdef __CLION_IDE__
#include <libgpu/opencl/cl/clion_defines.cl>
#endif

#include "helpers/rassert.cl"
#include "../defines.h"
#include "../shared_structs/bvh_node_gpu_shared.h"
#include "../shared_structs/morton_code_gpu_shared.h"

static inline int clz32(uint x) {
return x == 0 ? 32 : clz(x);
}

static inline int delta(
__global const MortonCode* codes,
int n, int i, int j)
{
if (j < 0 || j >= n) return -1;

MortonCode ci = codes[i];
MortonCode cj = codes[j];

if (ci == cj) {
return 32 + clz32((uint)i ^ (uint)j);
} else {
return clz32(ci ^ cj);
}
}

__attribute__((reqd_work_group_size(256, 1, 1)))
__kernel void lbvh_build_nodes(
__global const MortonCode* sorted_morton_codes,
__global const uint* sorted_indices,
__global BVHNodeGPU* nodes,
__global uint* leaf_triangle_indices,
uint nfaces)
{
uint i = get_global_id(0);
int n = (int)nfaces;

if (i >= nfaces) return;

if (i < nfaces - 1) {
int d_left = delta(sorted_morton_codes, n, i, i-1);
int d_right = delta(sorted_morton_codes, n, i, i+1);

int d = (d_right > d_left) ? 1 : -1;

int delta_min = delta(sorted_morton_codes, n, i, i - d);
int lmax = 2;
while (delta(sorted_morton_codes, n, i, i + lmax * d) > delta_min) {
lmax *= 2;
}

int l = 0;
for (int t = lmax / 2; t > 0; t /= 2) {
if (delta(sorted_morton_codes, n, i, i + (l + t) * d) > delta_min) {
l += t;
}
}

int j = i + l * d;
int first = min((int)i, j);
int last = max((int)i, j);

int common_prefix = delta(sorted_morton_codes, n, first, last);
int split = first;
int step = last - first;

do {
step = (step + 1) >> 1;
int new_split = split + step;
if (new_split < last) {
int split_prefix = delta(sorted_morton_codes, n, first, new_split);
if (split_prefix > common_prefix) {
split = new_split;
}
}
} while (step > 1);

int leaf_start = (int)nfaces - 1;
int left_child = (split == first) ? (leaf_start + split) : split;
int right_child = (split + 1 == last) ? (leaf_start + split + 1) : (split + 1);

nodes[i].leftChildIndex = left_child;
nodes[i].rightChildIndex = right_child;
}

int leaf_idx = (int)nfaces - 1 + i;
leaf_triangle_indices[i] = sorted_indices[i];
nodes[leaf_idx].leftChildIndex = 0xFFFFFFFFu;
nodes[leaf_idx].rightChildIndex = 0xFFFFFFFFu;
}
68 changes: 68 additions & 0 deletions src/kernels/cl/lbvh_compute_aabbs.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#ifdef __CLION_IDE__
#include <libgpu/opencl/cl/clion_defines.cl>
#endif

#include "helpers/rassert.cl"
#include "../defines.h"
#include "../shared_structs/bvh_node_gpu_shared.h"
#include "../shared_structs/aabb_gpu_shared.h"

static inline AABBGPU merge_aabb(AABBGPU a, AABBGPU b) {
AABBGPU result;
result.min_x = fmin(a.min_x, b.min_x);
result.min_y = fmin(a.min_y, b.min_y);
result.min_z = fmin(a.min_z, b.min_z);
result.max_x = fmax(a.max_x, b.max_x);
result.max_y = fmax(a.max_y, b.max_y);
result.max_z = fmax(a.max_z, b.max_z);
return result;
}

__attribute__((reqd_work_group_size(256, 1, 1)))
__kernel void lbvh_compute_aabbs(
__global BVHNodeGPU* nodes,
__global const AABBGPU* triangle_aabbs,
__global const uint* leaf_triangle_indices,
__global volatile int* node_visit_counts,
uint nfaces)
{
uint i = get_global_id(0);
if (i >= nfaces) return;

int leaf_idx = (int)nfaces - 1 + i;
nodes[leaf_idx].aabb = triangle_aabbs[leaf_triangle_indices[i]];

int current = -1;
for (int p = 0; p < (int)nfaces - 1; p++) {
if (nodes[p].leftChildIndex == (uint)leaf_idx || nodes[p].rightChildIndex == (uint)leaf_idx) {
current = p;
break;
}
}

while (current >= 0) {
int visit_count = atomic_inc(&node_visit_counts[current]);

if (visit_count == 0) {
return;
}

BVHNodeGPU node = nodes[current];
AABBGPU left_aabb = nodes[node.leftChildIndex].aabb;
AABBGPU right_aabb = nodes[node.rightChildIndex].aabb;
nodes[current].aabb = merge_aabb(left_aabb, right_aabb);

if (current == 0) break;

int parent_idx = -1;
for (int p = 0; p < (int)nfaces - 1; p++) {
if (nodes[p].leftChildIndex == (uint)current || nodes[p].rightChildIndex == (uint)current) {
parent_idx = p;
break;
}
}

current = parent_idx;
if (current < 0) break;
}
}
55 changes: 55 additions & 0 deletions src/kernels/cl/lbvh_compute_bounds.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#ifdef __CLION_IDE__
#include <libgpu/opencl/cl/clion_defines.cl>
#endif

#include "helpers/rassert.cl"
#include "../defines.h"

__attribute__((reqd_work_group_size(256, 1, 1)))
__kernel void lbvh_compute_bounds(
__global const float* triangle_centroids,
__global float* bounds_min,
__global float* bounds_max,
uint nfaces)
{
__local float local_min[3 * 256];
__local float local_max[3 * 256];

uint local_id = get_local_id(0);
uint global_id = get_global_id(0);
uint group_id = get_group_id(0);

float3 local_min_val = (float3)(FLT_MAX, FLT_MAX, FLT_MAX);
float3 local_max_val = (float3)(-FLT_MAX, -FLT_MAX, -FLT_MAX);

for (uint i = global_id; i < nfaces; i += get_global_size(0)) {
float3 c = (float3)(triangle_centroids[3*i], triangle_centroids[3*i+1], triangle_centroids[3*i+2]);
local_min_val = fmin(local_min_val, c);
local_max_val = fmax(local_max_val, c);
}

local_min[local_id * 3] = local_min_val.x;
local_min[local_id * 3 + 1] = local_min_val.y;
local_min[local_id * 3 + 2] = local_min_val.z;
local_max[local_id * 3] = local_max_val.x;
local_max[local_id * 3 + 1] = local_max_val.y;
local_max[local_id * 3 + 2] = local_max_val.z;
barrier(CLK_LOCAL_MEM_FENCE);

for (uint stride = 128; stride > 0; stride >>= 1) {
if (local_id < stride) {
for (int axis = 0; axis < 3; axis++) {
local_min[local_id * 3 + axis] = fmin(local_min[local_id * 3 + axis], local_min[(local_id + stride) * 3 + axis]);
local_max[local_id * 3 + axis] = fmax(local_max[local_id * 3 + axis], local_max[(local_id + stride) * 3 + axis]);
}
}
barrier(CLK_LOCAL_MEM_FENCE);
}

if (local_id == 0) {
for (int axis = 0; axis < 3; axis++) {
atomic_min((volatile __global int*)&bounds_min[axis], as_int(local_min[axis]));
atomic_max((volatile __global int*)&bounds_max[axis], as_int(local_max[axis]));
}
}
}
52 changes: 52 additions & 0 deletions src/kernels/cl/lbvh_compute_morton_codes.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#ifdef __CLION_IDE__
#include <libgpu/opencl/cl/clion_defines.cl>
#endif

#include "helpers/rassert.cl"
#include "../defines.h"
#include "../shared_structs/morton_code_gpu_shared.h"

static inline uint expandBits(uint v) {
v = (v * 0x00010001u) & 0xFF0000FFu;
v = (v * 0x00000101u) & 0x0F00F00Fu;
v = (v * 0x00000011u) & 0xC30C30C3u;
v = (v * 0x00000005u) & 0x49249249u;
return v;
}

static inline MortonCode morton3D(float x, float y, float z) {
uint ix = min(max((int)(x * 1024.0f), 0), 1023);
uint iy = min(max((int)(y * 1024.0f), 0), 1023);
uint iz = min(max((int)(z * 1024.0f), 0), 1023);

uint xx = expandBits(ix);
uint yy = expandBits(iy);
uint zz = expandBits(iz);

return (xx << 2) | (yy << 1) | zz;
}

__attribute__((reqd_work_group_size(256, 1, 1)))
__kernel void lbvh_compute_morton_codes(
__global const float* triangle_centroids,
__global MortonCode* morton_codes,
__global uint* sorted_indices,
float centroid_min_x, float centroid_min_y, float centroid_min_z,
float centroid_max_x, float centroid_max_y, float centroid_max_z,
uint nfaces)
{
uint i = get_global_id(0);
if (i >= nfaces) return;

float3 c = (float3)(triangle_centroids[3*i], triangle_centroids[3*i+1], triangle_centroids[3*i+2]);
float3 centroid_min = (float3)(centroid_min_x, centroid_min_y, centroid_min_z);
float3 centroid_max = (float3)(centroid_max_x, centroid_max_y, centroid_max_z);

float eps = 1e-9f;
float3 d = fmax(centroid_max - centroid_min, eps);
float3 n = (c - centroid_min) / d;
n = clamp(n, 0.0f, 1.0f);

morton_codes[i] = morton3D(n.x, n.y, n.z);
sorted_indices[i] = i;
}
39 changes: 39 additions & 0 deletions src/kernels/cl/lbvh_compute_triangle_data.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#ifdef __CLION_IDE__
#include <libgpu/opencl/cl/clion_defines.cl>
#endif

#include "helpers/rassert.cl"
#include "../defines.h"
#include "../shared_structs/aabb_gpu_shared.h"

__attribute__((reqd_work_group_size(256, 1, 1)))
__kernel void lbvh_compute_triangle_data(
__global const float* vertices,
__global const uint* faces,
__global AABBGPU* triangle_aabbs,
__global float* triangle_centroids,
uint nfaces)
{
uint i = get_global_id(0);
if (i >= nfaces) return;

uint3 f = (uint3)(faces[3*i], faces[3*i+1], faces[3*i+2]);
float3 v0 = (float3)(vertices[3*f.x], vertices[3*f.x+1], vertices[3*f.x+2]);
float3 v1 = (float3)(vertices[3*f.y], vertices[3*f.y+1], vertices[3*f.y+2]);
float3 v2 = (float3)(vertices[3*f.z], vertices[3*f.z+1], vertices[3*f.z+2]);

AABBGPU aabb;
aabb.min_x = fmin(fmin(v0.x, v1.x), v2.x);
aabb.min_y = fmin(fmin(v0.y, v1.y), v2.y);
aabb.min_z = fmin(fmin(v0.z, v1.z), v2.z);
aabb.max_x = fmax(fmax(v0.x, v1.x), v2.x);
aabb.max_y = fmax(fmax(v0.y, v1.y), v2.y);
aabb.max_z = fmax(fmax(v0.z, v1.z), v2.z);

triangle_aabbs[i] = aabb;

float3 centroid = (v0 + v1 + v2) / 3.0f;
triangle_centroids[3*i] = centroid.x;
triangle_centroids[3*i+1] = centroid.y;
triangle_centroids[3*i+2] = centroid.z;
}
Loading