Skip to content

Commit 399d34d

Browse files
[nference] remove wint float32 (PaddlePaddle#72545)
1 parent e22b7c3 commit 399d34d

File tree

2 files changed

+0
-86
lines changed

2 files changed

+0
-86
lines changed

paddle/phi/kernels/fusion/cutlass/cutlass_kernels/fpA_intB_gemm/fpA_intB_gemm.h

-38
Original file line numberDiff line numberDiff line change
@@ -123,42 +123,4 @@ class CutlassFpAIntBGemmRunner {
123123
int multi_processor_count_;
124124
};
125125

126-
// This allocation is present to help with compiling with other structures in
127-
// FT. It will throw an error in all functions because this runner assumes the
128-
// weight type and the activation type are different. We allow empty classes to
129-
// be created, but any calls to gemm or gemm_bias_act will throw an error.
130-
template <typename WeightType>
131-
class CutlassFpAIntBGemmRunner<float, WeightType> {
132-
public:
133-
CutlassFpAIntBGemmRunner() = default;
134-
~CutlassFpAIntBGemmRunner() = default;
135-
136-
void gemm(const float* A,
137-
const WeightType* B,
138-
const float* weight_scales,
139-
float* C,
140-
int m,
141-
int n,
142-
int k,
143-
int group_size,
144-
char* workspace_ptr,
145-
const size_t workspace_bytes,
146-
cudaStream_t stream);
147-
148-
void gemm_bias_act(const float* A,
149-
const WeightType* B,
150-
const float* weight_scales,
151-
const float* biases,
152-
float* C,
153-
int m,
154-
int n,
155-
int k,
156-
int group_size,
157-
std::string activation_type,
158-
char* workspace_ptr,
159-
const size_t workspace_bytes,
160-
cudaStream_t stream);
161-
162-
int getWorkspaceSize(const int m, const int n, const int k);
163-
};
164126
} // namespace phi

paddle/phi/kernels/fusion/cutlass/cutlass_kernels/fpA_intB_gemm/fpA_intB_gemm_template.cu

-48
Original file line numberDiff line numberDiff line change
@@ -823,58 +823,10 @@ int CutlassFpAIntBGemmRunner<T, WeightType>::getWorkspaceSize(const int m,
823823
return max_grid_m * max_grid_n * split_k_limit * 4;
824824
}
825825

826-
// =============================== Specialization T == WeightType
827-
// =======================================
828-
template <typename WeightType>
829-
void CutlassFpAIntBGemmRunner<float, WeightType>::gemm_bias_act(
830-
const float* A,
831-
const WeightType* B,
832-
const float* weight_scales,
833-
const float* biases,
834-
float* C,
835-
int m,
836-
int n,
837-
int k,
838-
int group_size,
839-
std::string activation_type,
840-
char* workspace_ptr,
841-
const size_t workspace_bytes,
842-
cudaStream_t stream) {
843-
throw std::runtime_error(
844-
("Attempting to run mixed gemm bias act when the types are the same is "
845-
"an error."));
846-
}
847-
848-
template <typename WeightType>
849-
void CutlassFpAIntBGemmRunner<float, WeightType>::gemm(
850-
const float* A,
851-
const WeightType* B,
852-
const float* weight_scales,
853-
float* C,
854-
int m,
855-
int n,
856-
int k,
857-
int group_size,
858-
char* workspace_ptr,
859-
const size_t workspace_bytes,
860-
cudaStream_t stream) {
861-
throw std::runtime_error((
862-
"Attempting to run mixed gemm when the types are the same is an error."));
863-
}
864-
865-
template <typename WeightType>
866-
int CutlassFpAIntBGemmRunner<float, WeightType>::getWorkspaceSize(const int m,
867-
const int n,
868-
const int k) {
869-
return 0;
870-
}
871-
872-
template class CutlassFpAIntBGemmRunner<float, uint8_t>;
873826
template class CutlassFpAIntBGemmRunner<half, uint8_t>;
874827
#ifdef PADDLE_CUDA_BF16
875828
template class CutlassFpAIntBGemmRunner<__nv_bfloat16, uint8_t>;
876829
#endif
877-
template class CutlassFpAIntBGemmRunner<float, cutlass::uint4b_t>;
878830
template class CutlassFpAIntBGemmRunner<half, cutlass::uint4b_t>;
879831
#ifdef PADDLE_CUDA_BF16
880832
template class CutlassFpAIntBGemmRunner<__nv_bfloat16, cutlass::uint4b_t>;

0 commit comments

Comments
 (0)