Skip to content

Commit 9bccbd8

Browse files
author
Sanket Kale
committed
Resolved MacOS and Web CI failures
1 parent 4f10c21 commit 9bccbd8

File tree

3 files changed

+16
-16
lines changed

3 files changed

+16
-16
lines changed

onnxruntime/core/mlas/lib/gelu_neon_fp16.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ Module Name:
1313
--*/
1414
#include "gelu.h"
1515
#include <cmath>
16-
#if defined(__ARM_NEON) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
16+
#if defined(MLAS_F16VEC_INTRINSICS_SUPPORTED)
1717

1818
void
1919
MLASCALL

onnxruntime/core/mlas/lib/mlasi.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1431,10 +1431,10 @@ struct MLAS_PLATFORM {
14311431
MLAS_COMPUTE_SUMEXP_FLOAT_KERNEL* ComputeSumExpF32Kernel;
14321432
MLAS_COMPUTE_LOGSOFTMAX_OUTPUT_FLOAT_KERNEL* ComputeLogSoftmaxOutputF32Kernel;
14331433
MLAS_COMPUTE_SOFTMAX_OUTPUT_FLOAT_KERNEL* ComputeSoftmaxOutputF32Kernel;
1434-
#if defined(MLAS_F16VEC_INTRINSICS_SUPPORTED)
1434+
#endif
1435+
#if defined(MLAS_F16VEC_INTRINSICS_SUPPORTED)
14351436
MLAS_COMPUTE_ERF_FP16_KERNEL* ErfF16KernelRoutine;
14361437
MLAS_COMPUTE_GELU_FP16_KERNEL* GeluF16KernelRoutine;
1437-
#endif
14381438
#endif
14391439
#if defined(MLAS_TARGET_AMD64)
14401440
MLAS_SGEMM_KERNEL_M1_ROUTINE* KernelM1Routine;

onnxruntime/core/providers/cpu/tensor/gelu.cc

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,9 @@ Status Gelu<T>::Compute(OpKernelContext* context) const {
7777
T* output_data = output->MutableData<T>();
7878

7979
concurrency::ThreadPool* tp = context->GetOperatorThreadPool();
80-
int64_t elem_count = input->Shape().Size();
81-
constexpr int64_t length_per_task = 4096; // this number comes from FastGelu.
82-
int64_t task_count = (elem_count + length_per_task - 1) / length_per_task;
80+
size_t elem_count = input->Shape().Size();
81+
constexpr size_t length_per_task = 4096; // this number comes from FastGelu.
82+
size_t task_count = (elem_count + length_per_task - 1) / length_per_task;
8383

8484
if (approximation_algorithm_ == "tanh") {
8585
// FastGelu allows optional bias. Here we split input data into chunks. Each chunk
@@ -95,16 +95,16 @@ Status Gelu<T>::Compute(OpKernelContext* context) const {
9595
const auto start = task_idx * length_per_task;
9696
const T* p_input = input_data + start;
9797
T* p_output = output_data + start;
98-
int64_t count = std::min(length_per_task, elem_count - start);
98+
size_t count = std::min(length_per_task, elem_count - start);
9999

100-
for (int64_t i = 0; i < count; i++) {
100+
for (size_t i = 0; i < count; i++) {
101101
T value = p_input[i];
102102
p_output[i] = value * (static_cast<T>(C) * value * value + static_cast<T>(B));
103103
}
104104

105105
MlasComputeTanh(p_output, p_output, narrow<size_t>(count));
106106

107-
for (int64_t i = 0; i < count; i++) {
107+
for (size_t i = 0; i < count; i++) {
108108
p_output[i] = 0.5f * p_input[i] * (p_output[i] + 1.0f);
109109
}
110110
},
@@ -117,16 +117,16 @@ Status Gelu<T>::Compute(OpKernelContext* context) const {
117117
const auto start = task_idx * length_per_task;
118118
const T* p_input = input_data + start;
119119
T* p_output = output_data + start;
120-
int64_t count = std::min(length_per_task, elem_count - start);
120+
size_t count = std::min(length_per_task, elem_count - start);
121121

122-
for (int64_t i = 0; i < count; i++) {
122+
for (size_t i = 0; i < count; i++) {
123123
T value = p_input[i];
124124
p_output[i] = value * static_cast<T>(M_SQRT1_2);
125125
}
126126

127127
MlasComputeErf(p_output, p_output, narrow<size_t>(count));
128128

129-
for (int64_t i = 0; i < count; i++) {
129+
for (size_t i = 0; i < count; i++) {
130130
p_output[i] = 0.5f * p_input[i] * (p_output[i] + 1.0f);
131131
}
132132
},
@@ -143,9 +143,9 @@ Status Gelu<MLFloat16>::Compute(OpKernelContext* context) const {
143143
Tensor* output = context->Output(0, input->Shape());
144144
MLFloat16* output_data = output->MutableData<MLFloat16>();
145145
concurrency::ThreadPool* tp = context->GetOperatorThreadPool();
146-
int64_t elem_count = input->Shape().Size();
147-
constexpr int64_t length_per_task = 4096;
148-
int64_t task_count = (elem_count + length_per_task - 1) / length_per_task;
146+
size_t elem_count = input->Shape().Size();
147+
constexpr size_t length_per_task = 4096;
148+
size_t task_count = (elem_count + length_per_task - 1) / length_per_task;
149149

150150
if (approximation_algorithm_ != "tanh" && approximation_algorithm_ != "none") {
151151
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Unsupported approximation_algorithm: ", approximation_algorithm_);
@@ -178,7 +178,7 @@ Status Gelu<MLFloat16>::Compute(OpKernelContext* context) const {
178178
const auto start = task_idx * length_per_task;
179179
const MLFloat16* p_input = input_data + start;
180180
MLFloat16* p_output = output_data + start;
181-
int64_t count = std::min(length_per_task, elem_count - start);
181+
size_t count = std::min(length_per_task, elem_count - start);
182182
MLFloat16* p_temp = temp_fp16_aligned.get() + start;
183183
MlasComputeFP16Gelu(p_input, p_output, p_temp, count, approximation_algorithm_);
184184
},

0 commit comments

Comments
 (0)