Skip to content

Commit 145c3a7

Browse files
cmikeh2tjruwasemrwyattii
authored
Fix missing scale attributes for GPTJ (#3256)
Co-authored-by: Olatunji Ruwase <[email protected]> Co-authored-by: Michael Wyatt <[email protected]>
1 parent ad168a6 commit 145c3a7

File tree

2 files changed

+5
-3
lines changed

2 files changed

+5
-3
lines changed

csrc/transformer/inference/csrc/pt_binding.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -462,9 +462,9 @@ std::vector<at::Tensor> ds_softmax_context(at::Tensor& query_key_value,
462462

463463
T* workspace = (T*)InferenceContext::Instance().GetWorkSpace();
464464
size_t buf_size = bsz * seq_len * hidden_dim;
465-
auto output = torch::from_blob(workspace + 3 * buf_size, {bsz, seq_len, hidden_dim}, options);
465+
auto output = torch::from_blob(workspace + 4 * buf_size, {bsz, seq_len, hidden_dim}, options);
466466

467-
auto query_cont = workspace + 4 * buf_size;
467+
auto query_cont = workspace + 5 * buf_size;
468468
size_t offset =
469469
10 * (hidden_dim * bsz * InferenceContext::Instance().GetMaxTokenLenght()) +
470470
layer_id * 2 * bsz * InferenceContext::Instance().GetMaxTokenLenght() * hidden_dim;

deepspeed/ops/transformer/inference/op_binding/gelu_gemm.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,9 @@ def forward(self,
2323
bias: torch.Tensor,
2424
weight_out: torch.Tensor,
2525
async_op: bool = False):
26-
output = self.fused_gemm_gelu(input, weight, weight.scale, bias, weight_out, weight_out.scale,
26+
output = self.fused_gemm_gelu(input, weight, weight.scale if hasattr(weight, "scale") else torch.empty(1),
27+
bias, weight_out,
28+
weight_out.scale if hasattr(weight_out, "scale") else torch.empty(1),
2729
self.config.epsilon, self.config.pre_layer_norm, self.config.q_int8, async_op,
2830
self.config.transposed_mode)
2931
return output

0 commit comments

Comments
 (0)