-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhelper.h
88 lines (73 loc) · 2.14 KB
/
helper.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#include <cstdlib> // header for rand()
#include <ctime> // header for time-seed for rand()
#include <limits> // get the smallest increment for a datatype
#include <cublas_v2.h>
using namespace std;
#define CEIL_DIV(M, N) ((M) + (N)-1) / (N)
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
void intializeMatrix(float *A, int size){
for(int i = 0; i < size; i++){
// A[i] = static_cast<float> (rand()) / static_cast<float> (RAND_MAX);
A[i] = (float)i;
}
}
void *fixed_cudaMalloc(size_t len)
{
void *p;
if (cudaMalloc(&p, len) == cudaSuccess) return p;
return 0;
}
template<class T>
bool approximatelyEqual(T a, T b, T epsilon)
{
return fabs(a - b) <= ( (fabs(a) < fabs(b) ? fabs(b) : fabs(a)) * epsilon);
}
template<class T>
void compareResultsMV(T *A, T *B, int M){
int factor = 100;
T epsilon = std::numeric_limits<T>::epsilon() * factor;
std::cout<<"Epsilon : " << epsilon << std::endl;
for(int i = 0; i < M; i++){
if( !approximatelyEqual(A[i], B[i], epsilon) ){
printf("Outside tolerance at indices at i : %d, P : %f, Q : %f\n",
i, A[i], B[i]);
exit(0);
}
}
}
__device__
void printArrayDevice(float *A, int rows, int cols){
for(int i = 0; i < rows; i++){
for(int j = 0; j < cols; j++){
printf("%1.0f ", A[i*cols + j]);
}
printf("\n");
}
}
// CPU parallel reduction
float computeSum(float* hA, int N){
// parallel reduction
while(N > 1){
N /= 2;
for(int i = 0; i < N; i++){
hA[i] += hA[i + N];
}
}
float result = hA[0];
return result;
}
void cuBLASSUM(float *A, int N, float* result){
cublasHandle_t handle;
cublasCreate(&handle);
int incx = 1; // stride between consecutive elements
cublasSasum(handle, N, A, incx, result);
cublasDestroy(handle);
}