Skip to content

Commit 1ffe793

Browse files
authored
Fix typo: change Upample to Upsample. (#23838)
### Description <!-- Describe your changes. --> Fixed a typo in function names related to the Upsample CUDA kernel. Changed incorrect spelling Upample to Upsample across relevant functions. ### Motivation and Context <!-- - Why is this change required? What problem does it solve? - If it fixes an open issue, please link to the issue here. --> This change is necessary to maintain consistency and prevent potential confusion caused by incorrect function names.
1 parent 1088a1e commit 1ffe793

File tree

3 files changed

+67
-67
lines changed

3 files changed

+67
-67
lines changed

onnxruntime/core/providers/cuda/tensor/upsample.cc

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -290,16 +290,16 @@ Status Upsample<T>::BaseCompute(OpKernelContext* context,
290290
scales_div[i] = fast_divmod(gsl::narrow_cast<int>(ceil(scales[i])));
291291
}
292292

293-
UpampleImpl(Stream(context),
294-
mode_,
295-
rank,
296-
(UpsampleMode::LINEAR == mode_) ? (rank == 2 ? X_dims[0] : X_dims[2]) : 0,
297-
input_strides,
298-
output_div_pitches,
299-
scales_div,
300-
reinterpret_cast<const CudaT*>(X->Data<T>()),
301-
reinterpret_cast<CudaT*>(Y->MutableData<T>()),
302-
output_count);
293+
UpsampleImpl(Stream(context),
294+
mode_,
295+
rank,
296+
(UpsampleMode::LINEAR == mode_) ? (rank == 2 ? X_dims[0] : X_dims[2]) : 0,
297+
input_strides,
298+
output_div_pitches,
299+
scales_div,
300+
reinterpret_cast<const CudaT*>(X->Data<T>()),
301+
reinterpret_cast<CudaT*>(Y->MutableData<T>()),
302+
output_count);
303303
}
304304

305305
return Status::OK();

onnxruntime/core/providers/cuda/tensor/upsample_impl.cu

Lines changed: 47 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@ namespace onnxruntime {
88
namespace cuda {
99

1010
template <typename T, int RANK>
11-
__global__ void _UpampleNearestKernel(const TArray<int64_t> input_pitches,
12-
const TArray<fast_divmod> output_div_pitches,
13-
const TArray<fast_divmod> scales_div,
14-
const T* __restrict__ input_data,
15-
T* __restrict__ output_data,
16-
const size_t N) {
11+
__global__ void _UpsampleNearestKernel(const TArray<int64_t> input_pitches,
12+
const TArray<fast_divmod> output_div_pitches,
13+
const TArray<fast_divmod> scales_div,
14+
const T* __restrict__ input_data,
15+
T* __restrict__ output_data,
16+
const size_t N) {
1717
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT(id, N);
1818
CUDA_LONG input_index = 0;
1919
CUDA_LONG output_index = id;
@@ -36,13 +36,13 @@ __global__ void _UpampleNearestKernel(const TArray<int64_t> input_pitches,
3636
// This is the common use-case where the 4-D input (batched multi-channel images)
3737
// is usually of shape [N, C, H, W] and the scales are [1.0, 1.0, height_scale, width_scale]
3838
template <typename T>
39-
__global__ void _UpampleBilinear4DInputKernel(const int64_t input_dim2,
40-
const TArray<int64_t> input_pitches,
41-
const TArray<fast_divmod> output_div_pitches,
42-
const TArray<fast_divmod> scales_div,
43-
const T* __restrict__ input_data,
44-
T* __restrict__ output_data,
45-
const size_t N) {
39+
__global__ void _UpsampleBilinear4DInputKernel(const int64_t input_dim2,
40+
const TArray<int64_t> input_pitches,
41+
const TArray<fast_divmod> output_div_pitches,
42+
const TArray<fast_divmod> scales_div,
43+
const T* __restrict__ input_data,
44+
T* __restrict__ output_data,
45+
const size_t N) {
4646
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT(id, N);
4747
CUDA_LONG input_index = 0;
4848

@@ -95,13 +95,13 @@ __global__ void _UpampleBilinear4DInputKernel(const int64_t input_dim2,
9595

9696
// The following method supports a 2-D input in 'Linear mode'
9797
template <typename T>
98-
__global__ void _UpampleBilinear2DInputKernel(const int64_t input_dim0,
99-
const TArray<int64_t> input_pitches,
100-
const TArray<fast_divmod> output_div_pitches,
101-
const TArray<fast_divmod> scales_div,
102-
const T* __restrict__ input_data,
103-
T* __restrict__ output_data,
104-
const size_t N) {
98+
__global__ void _UpsampleBilinear2DInputKernel(const int64_t input_dim0,
99+
const TArray<int64_t> input_pitches,
100+
const TArray<fast_divmod> output_div_pitches,
101+
const TArray<fast_divmod> scales_div,
102+
const T* __restrict__ input_data,
103+
T* __restrict__ output_data,
104+
const size_t N) {
105105
CALCULATE_ELEMENTWISE_INDEX_OR_EXIT(id, N);
106106
CUDA_LONG input_index = 0;
107107

@@ -147,44 +147,44 @@ __global__ void _UpampleBilinear2DInputKernel(const int64_t input_dim0,
147147
}
148148

149149
template <typename T>
150-
void UpampleImpl(cudaStream_t stream,
151-
const onnxruntime::UpsampleMode upsample_mode,
152-
const size_t rank,
153-
const int64_t input_dim2,
154-
const TArray<int64_t>& input_pitches,
155-
const TArray<fast_divmod>& output_div_pitches,
156-
const TArray<fast_divmod>& scales_div,
157-
const T* input_data,
158-
T* output_data,
159-
const size_t N) {
150+
void UpsampleImpl(cudaStream_t stream,
151+
const onnxruntime::UpsampleMode upsample_mode,
152+
const size_t rank,
153+
const int64_t input_dim2,
154+
const TArray<int64_t>& input_pitches,
155+
const TArray<fast_divmod>& output_div_pitches,
156+
const TArray<fast_divmod>& scales_div,
157+
const T* input_data,
158+
T* output_data,
159+
const size_t N) {
160160
int blocksPerGrid = (int)(ceil(static_cast<float>(N) / GridDim::maxThreadsPerBlock));
161161
if (onnxruntime::UpsampleMode::NN == upsample_mode) {
162162
if (rank == 4) {
163-
_UpampleNearestKernel<T, 4><<<blocksPerGrid, GridDim::maxThreadsPerBlock, 0, stream>>>(
163+
_UpsampleNearestKernel<T, 4><<<blocksPerGrid, GridDim::maxThreadsPerBlock, 0, stream>>>(
164164
input_pitches, output_div_pitches, scales_div,
165165
input_data, output_data, N);
166166
} else if (rank == 3) {
167-
_UpampleNearestKernel<T, 3><<<blocksPerGrid, GridDim::maxThreadsPerBlock, 0, stream>>>(
167+
_UpsampleNearestKernel<T, 3><<<blocksPerGrid, GridDim::maxThreadsPerBlock, 0, stream>>>(
168168
input_pitches, output_div_pitches, scales_div,
169169
input_data, output_data, N);
170170
} else if (rank == 2) {
171-
_UpampleNearestKernel<T, 2><<<blocksPerGrid, GridDim::maxThreadsPerBlock, 0, stream>>>(
171+
_UpsampleNearestKernel<T, 2><<<blocksPerGrid, GridDim::maxThreadsPerBlock, 0, stream>>>(
172172
input_pitches, output_div_pitches, scales_div,
173173
input_data, output_data, N);
174174
} else if (rank == 1) {
175-
_UpampleNearestKernel<T, 1><<<blocksPerGrid, GridDim::maxThreadsPerBlock, 0, stream>>>(
175+
_UpsampleNearestKernel<T, 1><<<blocksPerGrid, GridDim::maxThreadsPerBlock, 0, stream>>>(
176176
input_pitches, output_div_pitches, scales_div,
177177
input_data, output_data, N);
178178
} else {
179179
ORT_THROW("Unsupported rank by the Upsample CUDA kernel. Input rank: ", rank);
180180
}
181181
} else if (onnxruntime::UpsampleMode::LINEAR == upsample_mode) {
182182
if (rank == 4) {
183-
_UpampleBilinear4DInputKernel<T><<<blocksPerGrid, GridDim::maxThreadsPerBlock, 0, stream>>>(
183+
_UpsampleBilinear4DInputKernel<T><<<blocksPerGrid, GridDim::maxThreadsPerBlock, 0, stream>>>(
184184
input_dim2, input_pitches, output_div_pitches, scales_div,
185185
input_data, output_data, N);
186186
} else if (rank == 2) {
187-
_UpampleBilinear2DInputKernel<T><<<blocksPerGrid, GridDim::maxThreadsPerBlock, 0, stream>>>(
187+
_UpsampleBilinear2DInputKernel<T><<<blocksPerGrid, GridDim::maxThreadsPerBlock, 0, stream>>>(
188188
input_dim2, input_pitches, output_div_pitches, scales_div,
189189
input_data, output_data, N);
190190
} else {
@@ -197,17 +197,17 @@ void UpampleImpl(cudaStream_t stream,
197197
}
198198
}
199199

200-
#define SPECIALIZED_IMPL(T) \
201-
template void UpampleImpl<T>(cudaStream_t stream, \
202-
const onnxruntime::UpsampleMode upsample_mode, \
203-
const size_t rank, \
204-
const int64_t input_dim2, \
205-
const TArray<int64_t>& input_pitches, \
206-
const TArray<fast_divmod>& output_div_pitches, \
207-
const TArray<fast_divmod>& scales_div, \
208-
const T* input_data, \
209-
T* output_data, \
210-
const size_t N);
200+
#define SPECIALIZED_IMPL(T) \
201+
template void UpsampleImpl<T>(cudaStream_t stream, \
202+
const onnxruntime::UpsampleMode upsample_mode, \
203+
const size_t rank, \
204+
const int64_t input_dim2, \
205+
const TArray<int64_t>& input_pitches, \
206+
const TArray<fast_divmod>& output_div_pitches, \
207+
const TArray<fast_divmod>& scales_div, \
208+
const T* input_data, \
209+
T* output_data, \
210+
const size_t N);
211211

212212
SPECIALIZED_IMPL(float)
213213
SPECIALIZED_IMPL(double)

onnxruntime/core/providers/cuda/tensor/upsample_impl.h

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,16 @@ namespace onnxruntime {
1111
namespace cuda {
1212

1313
template <typename T>
14-
void UpampleImpl(cudaStream_t stream,
15-
const onnxruntime::UpsampleMode upsample_mode,
16-
const size_t rank,
17-
const int64_t input_dim2,
18-
const TArray<int64_t>& input_pitches,
19-
const TArray<fast_divmod>& output_div_pitches,
20-
const TArray<fast_divmod>& scales_div,
21-
const T* input_data,
22-
T* output_data,
23-
const size_t N);
14+
void UpsampleImpl(cudaStream_t stream,
15+
const onnxruntime::UpsampleMode upsample_mode,
16+
const size_t rank,
17+
const int64_t input_dim2,
18+
const TArray<int64_t>& input_pitches,
19+
const TArray<fast_divmod>& output_div_pitches,
20+
const TArray<fast_divmod>& scales_div,
21+
const T* input_data,
22+
T* output_data,
23+
const size_t N);
2424

2525
} // namespace cuda
2626
} // namespace onnxruntime

0 commit comments

Comments
 (0)