-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.cu
More file actions
94 lines (78 loc) · 3.63 KB
/
main.cu
File metadata and controls
94 lines (78 loc) · 3.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#include <chrono>
#include <iostream>
#include "config.hpp"
#include "config_utils.cuh"
#include "draw.cuh"
#include "parse.hpp"
#include "libpng.h"
#include "lbvh_builder.cuh"
using std::cout;
using std::endl;
#define CUDA_CHECK(call) \
{ \
cudaError_t err = call; \
if (err != cudaSuccess) { \
std::cerr << "CUDA Error in " << __FILE__ << " at line " \
<< __LINE__ << " : " << cudaGetErrorString(err) \
<< std::endl; \
exit(EXIT_FAILURE); \
} \
}
int main(int argc, char* argv[]){
StlConfig host_stl_config;
//parse the inputs into host config
parseInput(argv, host_stl_config);
auto start = std::chrono::high_resolution_clock::now();
RawConfig host_raw_config;
// init from stl config
initRawConfigFromStl(host_stl_config, host_raw_config);
// device allocations
copyConfigDataToDevice(host_stl_config, host_raw_config);
auto end = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> elapsed = end - start;
std::cout << "Initialize raw config time: " << elapsed.count() << " seconds" << std::endl;
CUDA_CHECK(cudaPeekAtLastError());
// build lvbh tree
if (host_raw_config.num_total_primitives > 0) {
build_lbvh_karas(host_raw_config, 30); // This allocates and fills raw_config_host_mirror.d_lbvh_nodes
}
CUDA_CHECK(cudaPeekAtLastError());
// copy config to gpu
start = std::chrono::high_resolution_clock::now();
RawConfig* d_raw_config;
CUDA_CHECK(cudaMalloc(&d_raw_config, sizeof(RawConfig)));
CUDA_CHECK(cudaMemcpy(d_raw_config, &host_raw_config, sizeof(RawConfig), cudaMemcpyHostToDevice));
// create the rgba array in gpu
pixel_t* d_image;
CUDA_CHECK(cudaMalloc(&d_image, host_stl_config.width * host_stl_config.height * sizeof(pixel_t)));
end = std::chrono::high_resolution_clock::now();
elapsed = end - start;
std::cout << "Malloc and transfer to device time: " << elapsed.count() << " seconds" << std::endl;
// increase the stack size
CUDA_CHECK(cudaDeviceSetLimit(cudaLimitStackSize, 64 * 1024));
start = std::chrono::high_resolution_clock::now();
render(d_image, host_stl_config.width, host_stl_config.height, host_stl_config.aa, d_raw_config);
CUDA_CHECK(cudaDeviceSynchronize());
end = std::chrono::high_resolution_clock::now();
elapsed = end - start;
std::cout << "Render time: " << elapsed.count() << " seconds" << std::endl;
// create the image
start = std::chrono::high_resolution_clock::now();
Image img(host_stl_config.width, host_stl_config.height);
// copy the rendered image from gpu to cpu
CUDA_CHECK(cudaMemcpy(img[0], d_image, host_stl_config.width * host_stl_config.height * sizeof(pixel_t), cudaMemcpyDeviceToHost));
end = std::chrono::high_resolution_clock::now();
elapsed = end - start;
std::cout << "Transfer to host time: " << elapsed.count() << " seconds" << std::endl;
std::string output_path = host_stl_config.filename;
img.save(output_path.c_str());
// // free gpu memory
start = std::chrono::high_resolution_clock::now();
CUDA_CHECK(cudaFree(d_image));
CUDA_CHECK(cudaFree(d_raw_config));
freeRawConfigDeviceMemory(host_raw_config);
CUDA_CHECK(cudaPeekAtLastError());
end = std::chrono::high_resolution_clock::now();
elapsed = end - start;
std::cout << "cudaFree time: " << elapsed.count() << " seconds" << std::endl;
}