-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathcommon.hip
127 lines (111 loc) · 3.37 KB
/
common.hip
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
// Copyright 2025 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#ifndef HIP_MATMUL_COMMON
#define HIP_MATMUL_COMMON
#include <hip/hip_runtime.h>
#include <cstdio>
#include <random>
#include <vector>
inline void hip_check_impl(hipError_t hip_error_code, const char *condstr,
const char *file, int line) {
if (hip_error_code != hipSuccess) {
fprintf(stderr, "HIP Error \"%s\" produced by `%s` at %s:%d\n",
hipGetErrorString(hip_error_code), condstr, file, line);
exit(EXIT_FAILURE);
}
}
#define HIP_CHECK(expr) hip_check_impl(expr, #expr, __FILE__, __LINE__)
inline int getIntEnvVar(const char *name, int default_val) {
const char *env = std::getenv(name);
return env ? std::stoi(env) : default_val;
};
struct MNKShape {
int M, N, K;
};
enum class Type { SI8, SI16, SI32, FP16, FP32 };
inline const char *str(Type t) {
switch (t) {
case Type::SI8:
return "si8";
case Type::SI16:
return "si16";
case Type::SI32:
return "si32";
case Type::FP16:
return "fp16";
case Type::FP32:
return "fp32";
}
}
inline __device__ __host__ int type_size(Type t) {
switch (t) {
case Type::SI8:
return 1;
case Type::SI16:
return 2;
case Type::SI32:
return 4;
case Type::FP16:
return 2;
case Type::FP32:
return 4;
}
}
template <Type t> struct CTypeImpl {};
template <> struct CTypeImpl<Type::SI8> {
using type = int8_t;
};
template <> struct CTypeImpl<Type::SI16> {
using type = int16_t;
};
template <> struct CTypeImpl<Type::SI32> {
using type = int32_t;
};
template <> struct CTypeImpl<Type::FP16> {
using type = _Float16;
};
template <> struct CTypeImpl<Type::FP32> {
using type = float;
};
template <Type t> using CType = typename CTypeImpl<t>::type;
template <typename A, typename B>
__device__ __host__ std::common_type_t<A, B> ceil_div(A a, B b) {
return (a + b - 1) / b;
}
template <int Po2, typename A> __device__ __host__ A round_up_to_po2(A a) {
static_assert(Po2 > 0 && (Po2 & (Po2 - 1)) == 0);
return ((a - 1) | (Po2 - 1)) + 1;
}
template <Type type>
void fillRandomBuffer(int size, std::minstd_rand &r, void *out_buffer) {
using T = CType<type>;
T *out_buffer_typed = static_cast<T *>(out_buffer);
for (int i = 0; i < size; ++i) {
// Generate small integers in [-2, +2] so products are in [-4, +4] so
// accumulators are in [-4K, +4K] for accumulation depth K so they're
// exactly representable, float rounding is exact and we don't need
// fuzzy compares.
out_buffer_typed[i] = static_cast<T>(static_cast<int>((r() % 5)) - 2);
}
}
inline std::vector<std::byte> makeRandomBuffer(Type type, int size,
std::minstd_rand &r) {
int bytes = size * type_size(type);
std::vector<std::byte> result(bytes);
if (type == Type::SI8) {
fillRandomBuffer<Type::SI8>(size, r, result.data());
} else if (type == Type::SI16) {
fillRandomBuffer<Type::SI16>(size, r, result.data());
} else if (type == Type::SI32) {
fillRandomBuffer<Type::SI32>(size, r, result.data());
} else if (type == Type::FP16) {
fillRandomBuffer<Type::FP16>(size, r, result.data());
} else if (type == Type::FP32) {
fillRandomBuffer<Type::FP32>(size, r, result.data());
}
return result;
}
#endif