-
Notifications
You must be signed in to change notification settings - Fork 62
/
Copy pathunified.cu
63 lines (48 loc) · 1.46 KB
/
unified.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#include <cuda_runtime.h>
#include <iostream>
// CUDA Kernel to add elements of two arrays
// __global__ void addKernel(int *a, int *b, int *c, int size) {
// int idx = threadIdx.x + blockIdx.x * blockDim.x;
// if (idx < size) {
// c[idx] = a[idx] * b[idx];
// }
// }
__global__ void mulKernel(int *a, int *c, int size) {
int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < size) {
c[idx] = a[idx] * 100;
}
}
int main() {
// Define array size
const int size = 10;
const int bytes = size * sizeof(int);
// Unified memory allocation
int *a, *c;
cudaMallocManaged(&c, bytes);
cudaMallocManaged(&a, bytes);
// Initialize arrays on the CPU
for (int i = 0; i < size; ++i) {
a[i] = i;
}
// Define kernel launch parameters
const int threadsPerBlock = 256;
const int blocks = (size + threadsPerBlock - 1) / threadsPerBlock;
std::cout << "launching kernel..." << std::endl;
for (int i = 0; i < size; ++i) {
std::cout << "a[" << i << "] + b[" << i << "] = " << a[i] << "\n";
}
// Launch the kernel
mulKernel<<<blocks, threadsPerBlock>>>(a, c, size);
// Wait for GPU to finish
cudaDeviceSynchronize();
// Display results
std::cout << "Results:\n";
for (int i = 0; i < size; ++i) {
std::cout << "a[" << i << "] + b[" << i << "] = " << c[i] << "\n";
}
// Free unified memory
cudaFree(a);
cudaFree(c);
return 0;
}