Skip to content

Commit cf5b44d

Browse files
committed
Added guard on double ETI
1 parent c39f424 commit cf5b44d

File tree

1 file changed

+103
-97
lines changed

1 file changed

+103
-97
lines changed

src/layers/misc/distconv/distconv_channelwise_softmax.cu

Lines changed: 103 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -25,118 +25,124 @@
2525
////////////////////////////////////////////////////////////////////////////////
2626

2727
#define LBANN_LAYERS_MISC_CHANNELWISE_SOFTMAX_INSTANTIATE
28-
#include "lbann/utils/distconv.hpp"
28+
#include "../channelwise_softmax_kernels.cuh"
2929
#include "lbann/base.hpp"
3030
#include "lbann/layers/misc/distconv/distconv_channelwise_softmax.hpp"
31+
#include "lbann/utils/distconv.hpp"
3132
#include "lbann/utils/gpu/helpers.hpp"
32-
#include "../channelwise_softmax_kernels.cuh"
33-
3433

3534
#ifdef LBANN_HAS_DISTCONV
36-
namespace distconv{
37-
template<typename Backend, typename DataType>
38-
template<typename Allocator>
39-
int
40-
ChannelwiseSoftmax<Backend, DataType>
41-
::forward(const tensor::Tensor<DataType, tensor::LocaleMPI, Allocator> &input_0,
42-
tensor::Tensor<DataType, tensor::LocaleMPI, Allocator> &output){
43-
44-
if (input_0.get_local_size() == 0 || output.get_local_size() == 0){
45-
util::MPIRootPrintStreamInfo() << "WARNING: EMPTY INPUT FOUND \n";
46-
return 1; // no op for empty inputs
47-
}
48-
49-
const auto& input_0_dims = input_0.get_local_shape();
50-
51-
const auto num_channels = input_0_dims[2];
52-
const auto local_mini_batch_size = input_0_dims[3];
53-
const auto mat_channel_size = input_0_dims[0] * input_0_dims[1];
54-
const auto mat_stride = num_channels * mat_channel_size;
55-
56-
// Convert to Hydrogen matrices for kernel launch
57-
58-
using LocalMat = El::Matrix<DataType, El::Device::GPU>;
59-
60-
LocalMat local_input(mat_stride,
35+
namespace distconv {
36+
template <typename Backend, typename DataType>
37+
template <typename Allocator>
38+
int ChannelwiseSoftmax<Backend, DataType>::forward(
39+
const tensor::Tensor<DataType, tensor::LocaleMPI, Allocator>& input_0,
40+
tensor::Tensor<DataType, tensor::LocaleMPI, Allocator>& output)
41+
{
42+
43+
if (input_0.get_local_size() == 0 || output.get_local_size() == 0) {
44+
util::MPIRootPrintStreamInfo() << "WARNING: EMPTY INPUT FOUND \n";
45+
return 1; // no op for empty inputs
46+
}
47+
48+
const auto& input_0_dims = input_0.get_local_shape();
49+
50+
const auto num_channels = input_0_dims[2];
51+
const auto local_mini_batch_size = input_0_dims[3];
52+
const auto mat_channel_size = input_0_dims[0] * input_0_dims[1];
53+
const auto mat_stride = num_channels * mat_channel_size;
54+
55+
// Convert to Hydrogen matrices for kernel launch
56+
57+
using LocalMat = El::Matrix<DataType, El::Device::GPU>;
58+
59+
LocalMat local_input(mat_stride,
60+
local_mini_batch_size,
61+
input_0.get_buffer(),
62+
mat_stride);
63+
64+
LocalMat local_output(mat_stride,
6165
local_mini_batch_size,
62-
input_0.get_buffer(),
66+
output.get_buffer(),
6367
mat_stride);
6468

65-
LocalMat local_output(mat_stride,
66-
local_mini_batch_size,
67-
output.get_buffer(),
68-
mat_stride);
69-
70-
::lbann::channelwise_softmax_fp_impl(num_channels,
71-
mat_channel_size,
72-
local_input,
73-
local_output);
74-
return 1;
69+
::lbann::channelwise_softmax_fp_impl(num_channels,
70+
mat_channel_size,
71+
local_input,
72+
local_output);
73+
return 1;
74+
}
75+
76+
template <typename Backend, typename DataType>
77+
template <typename Allocator>
78+
int ChannelwiseSoftmax<Backend, DataType>::backward(
79+
const tensor::Tensor<DataType, tensor::LocaleMPI, Allocator>& output,
80+
const tensor::Tensor<DataType, tensor::LocaleMPI, Allocator>& output_grad,
81+
tensor::Tensor<DataType, tensor::LocaleMPI, Allocator>& input_grad_0)
82+
{
83+
if (output.get_local_size() == 0 || output_grad.get_local_size() == 0 ||
84+
input_grad_0.get_local_size() == 0) {
85+
util::MPIRootPrintStreamInfo() << "WARNING: EMPTY INPUT FOUND \n";
86+
return 1; // no op for empty inputs
7587
}
7688

77-
template<typename Backend, typename DataType>
78-
template<typename Allocator>
79-
int
80-
ChannelwiseSoftmax<Backend, DataType>
81-
::backward(const tensor::Tensor<DataType, tensor::LocaleMPI, Allocator> &output,
82-
const tensor::Tensor<DataType, tensor::LocaleMPI, Allocator> &output_grad,
83-
tensor::Tensor<DataType, tensor::LocaleMPI, Allocator> &input_grad_0){
84-
if (output.get_local_size() == 0 ||
85-
output_grad.get_local_size() == 0 ||
86-
input_grad_0.get_local_size() == 0){
87-
util::MPIRootPrintStreamInfo() << "WARNING: EMPTY INPUT FOUND \n";
88-
return 1; // no op for empty inputs
89-
}
90-
91-
const auto& input_0_dims = output.get_local_shape();
92-
const auto num_channels = input_0_dims[2];
93-
const auto local_mini_batch_size = input_0_dims[3];
94-
const auto mat_channel_size = input_0_dims[0] * input_0_dims[1];
95-
const auto mat_stride = num_channels * mat_channel_size;
96-
97-
// Convert to Hydrogen matrices for kernel launch
98-
99-
using LocalMat = El::Matrix<DataType, El::Device::GPU>;
100-
101-
LocalMat local_output(mat_stride,
102-
local_mini_batch_size,
103-
output.get_buffer(),
104-
mat_stride);
105-
106-
LocalMat local_output_grad(mat_stride,
107-
local_mini_batch_size,
108-
output_grad.get_buffer(),
109-
mat_stride);
110-
111-
LocalMat local_input_grad(mat_stride,
112-
local_mini_batch_size,
113-
input_grad_0.get_buffer(),
114-
mat_stride);
115-
116-
::lbann::channelwise_softmax_bp_impl(num_channels,
117-
mat_channel_size,
118-
local_output,
119-
local_output_grad,
120-
local_input_grad);
121-
return 1;
122-
}
89+
const auto& input_0_dims = output.get_local_shape();
90+
const auto num_channels = input_0_dims[2];
91+
const auto local_mini_batch_size = input_0_dims[3];
92+
const auto mat_channel_size = input_0_dims[0] * input_0_dims[1];
93+
const auto mat_stride = num_channels * mat_channel_size;
94+
95+
// Convert to Hydrogen matrices for kernel launch
96+
97+
using LocalMat = El::Matrix<DataType, El::Device::GPU>;
98+
99+
LocalMat local_output(mat_stride,
100+
local_mini_batch_size,
101+
output.get_buffer(),
102+
mat_stride);
103+
104+
LocalMat local_output_grad(mat_stride,
105+
local_mini_batch_size,
106+
output_grad.get_buffer(),
107+
mat_stride);
108+
109+
LocalMat local_input_grad(mat_stride,
110+
local_mini_batch_size,
111+
input_grad_0.get_buffer(),
112+
mat_stride);
113+
114+
::lbann::channelwise_softmax_bp_impl(num_channels,
115+
mat_channel_size,
116+
local_output,
117+
local_output_grad,
118+
local_input_grad);
119+
return 1;
120+
}
123121

124122
// =========================================================
125123
// Explicit template instantiation
126124
// =========================================================
127125

128-
#define ETI(T, Backend) \
129-
template class ChannelwiseSoftmax<Backend, T>; \
130-
template int ChannelwiseSoftmax<Backend, T>::forward<tensor::CUDAAllocator>( \
131-
const tensor::Tensor<T, tensor::LocaleMPI, tensor::CUDAAllocator> &input_0, \
132-
tensor::Tensor<T, tensor::LocaleMPI, tensor::CUDAAllocator> &output_0); \
133-
template int ChannelwiseSoftmax<Backend, T>::backward<tensor::CUDAAllocator>( \
134-
const tensor::Tensor<T, tensor::LocaleMPI, tensor::CUDAAllocator> &input_0, \
135-
const tensor::Tensor<T, tensor::LocaleMPI, tensor::CUDAAllocator> &input_1, \
136-
tensor::Tensor<T, tensor::LocaleMPI, tensor::CUDAAllocator> &output_grad);
137-
126+
#define ETI(T, Backend) \
127+
template class ChannelwiseSoftmax<Backend, T>; \
128+
template int ChannelwiseSoftmax<Backend, T>::forward<tensor::CUDAAllocator>( \
129+
const tensor::Tensor<T, tensor::LocaleMPI, tensor::CUDAAllocator>& \
130+
input_0, \
131+
tensor::Tensor<T, tensor::LocaleMPI, tensor::CUDAAllocator>& output_0); \
132+
template int \
133+
ChannelwiseSoftmax<Backend, T>::backward<tensor::CUDAAllocator>( \
134+
const tensor::Tensor<T, tensor::LocaleMPI, tensor::CUDAAllocator>& \
135+
input_0, \
136+
const tensor::Tensor<T, tensor::LocaleMPI, tensor::CUDAAllocator>& \
137+
input_1, \
138+
tensor::Tensor<T, tensor::LocaleMPI, tensor::CUDAAllocator>& output_grad);
139+
140+
/// @todo: fp16
138141
ETI(float, BackendDNNLib)
142+
#ifdef LBANN_HAS_DOUBLE
139143
ETI(double, BackendDNNLib)
144+
#endif // LBANN_HAS_DOUBLE
145+
140146
#undef ETI
141-
} // namespace distconv
142-
#endif // LBANN_HAS_DISTCONV
147+
} // namespace distconv
148+
#endif // LBANN_HAS_DISTCONV

0 commit comments

Comments
 (0)