Skip to content

Commit 2d58a58

Browse files
committed
Updated implementation to incorporate updated channelwise softmax API
- Currently building and linking
1 parent 83095e6 commit 2d58a58

File tree

3 files changed

+122
-116
lines changed

3 files changed

+122
-116
lines changed

include/lbann/layers/misc/channelwise_softmax.hpp

Lines changed: 98 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -37,29 +37,37 @@
3737
#include "lbann/layers/misc/distconv/distconv_channelwise_softmax.hpp"
3838
#endif
3939

40-
4140
namespace lbann {
4241

4342
#ifdef LBANN_HAS_DISTCONV
43+
namespace dc {
44+
template <typename TensorDataType>
45+
using ChannelwiseSoftmax =
46+
::distconv::ChannelwiseSoftmax<Backend, TensorDataType>;
47+
} // namespace dc
48+
4449
template <typename TensorDataType, data_layout Layout, El::Device Device>
4550
class channelwise_softmax_distconv_adapter
46-
: public data_type_distconv_adapter<TensorDataType>{
47-
public:
48-
using TensorDevType = typename data_type_distconv_adapter<TensorDataType>::TensorDevType;
49-
50-
channelwise_softmax_distconv_adapter(Layer& layer)
51-
: data_type_distconv_adapter<TensorDataType>(layer){}
52-
53-
virtual ~channelwise_softmax_distconv_adapter() = default;
54-
void setup_distributions(tensor_overlap_constraints &constraints) override;
55-
void setup_layer(size_t workspace_capacity) override;
56-
void fp_compute();
57-
void bp_compute();
58-
std::unique_ptr<dc::ChannelwiseSoftmax<TensorDataType>> m_channelwise_softmax_operator;
59-
}; // class definition channelwise_softmax_distconv_adapter
60-
61-
#endif // LBANN_HAS_DISTCONV
51+
: public data_type_distconv_adapter<TensorDataType>
52+
{
53+
public:
54+
using TensorDevType =
55+
typename data_type_distconv_adapter<TensorDataType>::TensorDevType;
56+
57+
channelwise_softmax_distconv_adapter(Layer& layer)
58+
: data_type_distconv_adapter<TensorDataType>(layer)
59+
{}
60+
61+
virtual ~channelwise_softmax_distconv_adapter() = default;
62+
void setup_distributions(tensor_overlap_constraints& constraints) override;
63+
void setup_layer(size_t workspace_capacity) override;
64+
void fp_compute();
65+
void bp_compute();
66+
std::unique_ptr<dc::ChannelwiseSoftmax<TensorDataType>>
67+
m_channelwise_softmax_operator;
68+
}; // class definition channelwise_softmax_distconv_adapter
6269

70+
#endif // LBANN_HAS_DISTCONV
6371

6472
/** @brief Apply softmax to tensor channels.
6573
*
@@ -121,14 +129,29 @@ class channelwise_softmax_layer : public data_type_layer<TensorDataType>
121129
void bp_compute() override;
122130

123131
#ifdef LBANN_HAS_DISTCONV
124-
friend class channelwise_softmax_distconv_adapter<TensorDataType, Layout, Device>;
125-
protected:
126-
void setup_distconv_adapter(const DataReaderMetaData& dr_metadata) override;
127-
bool is_distconv_supported() const override;
128-
channelwise_softmax_distconv_adapter<TensorDataType, Layout, Device>& get_distconv_adapter() override;
129-
const channelwise_softmax_distconv_adapter<TensorDataType, Layout, Device>& get_distconv_adapter() const override;
132+
friend class channelwise_softmax_distconv_adapter<TensorDataType,
133+
Layout,
134+
Device>;
135+
136+
protected:
137+
void setup_distconv_adapter() override;
138+
bool is_distconv_supported() const override;
139+
channelwise_softmax_distconv_adapter<TensorDataType, Layout, Device>&
140+
get_distconv_adapter() override;
141+
const channelwise_softmax_distconv_adapter<TensorDataType, Layout, Device>&
142+
get_distconv_adapter() const override;
130143
#endif // LBANN_HAS_DISTCONV
144+
private:
145+
void get_channel_size_and_stride(El::Int& channel_size,
146+
El::Int& channel_stride,
147+
El::Int& num_channels) const;
148+
149+
/** Specifies the dimension of the tensor to perform softmax on. */
150+
int64_t m_dim;
131151

152+
/** @brief If true, only performs softmax on the chosen dimension. Otherwise
153+
all dimensions but ``m_dim`` will be used. */
154+
bool m_single_dim_mode;
132155
};
133156

134157
// Builder function
@@ -184,127 +207,114 @@ El::Device channelwise_softmax_layer<TensorDataType, Layout, Device>::
184207
return Device;
185208
}
186209

187-
template <typename TensorDataType, data_layout Layout, El::Device Device>
188-
void channelwise_softmax_layer<TensorDataType,Layout,Device>::setup_dims(DataReaderMetaData& dr_metadata) {
189-
data_type_layer<TensorDataType>::setup_dims(dr_metadata);
190-
this->set_output_dims(this->get_input_dims());
191-
192-
#ifdef LBANN_HAS_DISTCONV
193-
194-
if (this->distconv_enabled()){
195-
// Additional checks when distconv mode is enabled
196-
const auto& input_dims = this->get_input_dims();
197-
const auto& output_dims = this->get_output_dims();
198-
199-
if (input_dims.size() != 3 || output_dims.size() != 3){
200-
LBANN_ERROR(this->get_type()," layer \"",this->get_name(),"\" ",
201-
"expects an input and output tensor with 3 dimensions (channel, *, *), "
202-
"but it has been configured as a ",
203-
input_dims.size(), "-D input tensor and ",
204-
output_dims.size(),"-D output tensor");
205-
}
206-
}
207-
#endif // LBANN_HAS_DISTCONV
208-
}
209-
210210
#ifdef LBANN_HAS_DISTCONV
211211

212212
// =========================================================
213213
// DistConv-Adapter member functions
214214
// =========================================================
215215
template <typename TensorDataType, data_layout Layout, El::Device Device>
216-
void
217-
channelwise_softmax_distconv_adapter<TensorDataType, Layout, Device>
218-
::setup_distributions(tensor_overlap_constraints &constraints){
216+
void channelwise_softmax_distconv_adapter<TensorDataType, Layout, Device>::
217+
setup_distributions(tensor_overlap_constraints& constraints)
218+
{
219219
data_type_distconv_adapter<TensorDataType>::setup_distributions(constraints);
220220

221-
for (auto &d: this->m_prev_activations_dists) {
221+
for (auto& d : this->m_prev_activations_dists) {
222222
d.clear_overlap();
223223
constraints.mark_updated(d);
224224
constraints.mark_invariant(d);
225225
}
226-
for (auto &d: this->m_activations_dists) {
226+
for (auto& d : this->m_activations_dists) {
227227
d.clear_overlap();
228228
constraints.mark_updated(d);
229229
constraints.mark_invariant(d);
230230
}
231-
for (auto &d: this->m_prev_error_signals_dists) {
231+
for (auto& d : this->m_prev_error_signals_dists) {
232232
d.clear_overlap();
233233
constraints.mark_updated(d);
234234
constraints.mark_invariant(d);
235235
}
236-
for (auto &d: this->m_error_signals_dists) {
236+
for (auto& d : this->m_error_signals_dists) {
237237
d.clear_overlap();
238238
constraints.mark_updated(d);
239239
constraints.mark_invariant(d);
240240
}
241241
}
242242

243243
template <typename TensorDataType, data_layout Layout, El::Device Device>
244-
void
245-
channelwise_softmax_distconv_adapter<TensorDataType, Layout, Device>
246-
::setup_layer(size_t workspace_capacity){
244+
void channelwise_softmax_distconv_adapter<TensorDataType, Layout, Device>::
245+
setup_layer(size_t workspace_capacity)
246+
{
247247
data_type_distconv_adapter<TensorDataType>::setup_layer(workspace_capacity);
248248

249-
m_channelwise_softmax_operator = std::make_unique<dc::ChannelwiseSoftmax<TensorDataType>>(dc::get_backend());
249+
m_channelwise_softmax_operator =
250+
std::make_unique<dc::ChannelwiseSoftmax<TensorDataType>>(dc::get_backend());
250251
}
251252

252253
template <typename TensorDataType, data_layout Layout, El::Device Device>
253-
void
254-
channelwise_softmax_distconv_adapter<TensorDataType, Layout, Device>
255-
::fp_compute(){
256-
auto &layer = dynamic_cast<
257-
channelwise_softmax_layer<TensorDataType, Layout, Device>&>(this->layer());
254+
void channelwise_softmax_distconv_adapter<TensorDataType, Layout, Device>::
255+
fp_compute()
256+
{
257+
auto& layer =
258+
dynamic_cast<channelwise_softmax_layer<TensorDataType, Layout, Device>&>(
259+
this->layer());
258260
m_channelwise_softmax_operator->forward(this->get_prev_activations(0),
259261
this->get_activations(0));
260262
}
261263

262264
template <typename TensorDataType, data_layout Layout, El::Device Device>
263-
void
264-
channelwise_softmax_distconv_adapter<TensorDataType, Layout, Device>
265-
::bp_compute(){
266-
auto &layer = dynamic_cast<
267-
channelwise_softmax_layer<TensorDataType, Layout, Device>&>(this->layer());
268-
m_channelwise_softmax_operator->backward(this->get_activations(0),
269-
this->get_prev_error_signals(),
270-
this->get_error_signals(0));
265+
void channelwise_softmax_distconv_adapter<TensorDataType, Layout, Device>::
266+
bp_compute()
267+
{
268+
auto& layer =
269+
dynamic_cast<channelwise_softmax_layer<TensorDataType, Layout, Device>&>(
270+
this->layer());
271+
m_channelwise_softmax_operator->backward(this->get_activations(0),
272+
this->get_prev_error_signals(),
273+
this->get_error_signals(0));
271274
}
272275
// =============================================================
273276
// DistConv-enabled Channelwise-Softmax member functions
274277
// =============================================================
275278

276279
template <typename TensorDataType, data_layout Layout, El::Device Device>
277-
bool
278-
channelwise_softmax_layer<TensorDataType, Layout, Device>
279-
::is_distconv_supported() const {
280-
return Device==El::Device::GPU && Layout == data_layout::DATA_PARALLEL;
280+
bool channelwise_softmax_layer<TensorDataType, Layout, Device>::
281+
is_distconv_supported() const
282+
{
283+
return Device == El::Device::GPU && Layout == data_layout::DATA_PARALLEL;
281284
}
282285

283286
template <typename TensorDataType, data_layout Layout, El::Device Device>
284-
void
285-
channelwise_softmax_layer<TensorDataType, Layout, Device>
286-
::setup_distconv_adapter(const DataReaderMetaData& dr_metadata){
287-
this->get_distconv_adapter_ptr() = std::make_unique<channelwise_softmax_distconv_adapter<
288-
TensorDataType, Layout, Device>>(*this);
287+
void channelwise_softmax_layer<TensorDataType, Layout, Device>::
288+
setup_distconv_adapter()
289+
{
290+
this->get_distconv_adapter_ptr() = std::make_unique<
291+
channelwise_softmax_distconv_adapter<TensorDataType, Layout, Device>>(
292+
*this);
289293
}
290294

291295
template <typename TensorDataType, data_layout Layout, El::Device Device>
292296
const channelwise_softmax_distconv_adapter<TensorDataType, Layout, Device>&
293-
channelwise_softmax_layer<TensorDataType, Layout, Device>
294-
::get_distconv_adapter() const{
295-
return dynamic_cast<const channelwise_softmax_distconv_adapter<
296-
TensorDataType, Layout, Device>&>(data_type_layer<TensorDataType>::get_distconv_adapter());
297+
channelwise_softmax_layer<TensorDataType, Layout, Device>::
298+
get_distconv_adapter() const
299+
{
300+
return dynamic_cast<const channelwise_softmax_distconv_adapter<TensorDataType,
301+
Layout,
302+
Device>&>(
303+
data_type_layer<TensorDataType>::get_distconv_adapter());
297304
}
298305

299306
template <typename TensorDataType, data_layout Layout, El::Device Device>
300307
channelwise_softmax_distconv_adapter<TensorDataType, Layout, Device>&
301-
channelwise_softmax_layer<TensorDataType, Layout, Device>
302-
::get_distconv_adapter(){
303-
return const_cast<channelwise_softmax_distconv_adapter<TensorDataType, Layout, Device>&>(
304-
static_cast<const channelwise_softmax_layer<TensorDataType, Layout, Device>&>(*this).get_distconv_adapter());
308+
channelwise_softmax_layer<TensorDataType, Layout, Device>::
309+
get_distconv_adapter()
310+
{
311+
return const_cast<
312+
channelwise_softmax_distconv_adapter<TensorDataType, Layout, Device>&>(
313+
static_cast<
314+
const channelwise_softmax_layer<TensorDataType, Layout, Device>&>(*this)
315+
.get_distconv_adapter());
305316
}
306317

307-
308318
#endif // LBANN_HAS_DISTCONV
309319

310320
#ifndef LBANN_CHANNELWISE_SOFTMAX_LAYER_INSTANTIATE

include/lbann/layers/misc/channelwise_softmax_impl.hpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,28 @@ void channelwise_softmax_layer<TensorDataType, Layout, Device>::setup_dims()
5353
}
5454

5555
this->set_output_dims(this->get_input_dims());
56+
#ifdef LBANN_HAS_DISTCONV
57+
58+
if (this->distconv_enabled()) {
59+
// Additional checks when distconv mode is enabled
60+
const auto& input_dims = this->get_input_dims();
61+
const auto& output_dims = this->get_output_dims();
62+
63+
if (input_dims.size() != 3 || output_dims.size() != 3) {
64+
LBANN_ERROR(
65+
this->get_type(),
66+
" layer \"",
67+
this->get_name(),
68+
"\" ",
69+
"expects an input and output tensor with 3 dimensions (channel, *, *), "
70+
"but it has been configured as a ",
71+
input_dims.size(),
72+
"-D input tensor and ",
73+
output_dims.size(),
74+
"-D output tensor");
75+
}
76+
}
77+
#endif // LBANN_HAS_DISTCONV
5678
}
5779

5880
template <typename TensorDataType, data_layout Layout, El::Device Device>

include/lbann/utils/distconv.hpp

Lines changed: 2 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -53,17 +53,6 @@
5353
#include "p2p/p2p.hpp"
5454
#endif // DISTCONV_HAS_P2P
5555

56-
#include "lbann/layers/learning/distconv/distconv_layers.hpp"
57-
#include "lbann/layers/math/distconv/distconv_matmul.hpp"
58-
59-
#ifdef LBANN_HAS_NVSHMEM
60-
#include "lbann/layers/transform/distconv/distconv_scatter.hpp"
61-
#include "lbann/layers/transform/distconv/distconv_gather.hpp"
62-
#include "lbann/layers/transform/distconv/distconv_nvshmem_vector_addressing.hpp"
63-
#endif // LBANN_HAS_NVSHMEM
64-
65-
#include "lbann/layers/misc/distconv/distconv_channelwise_softmax.hpp"
66-
6756
namespace lbann {
6857

6958
inline auto default_hydrogen_stream()
@@ -137,23 +126,8 @@ using MPIRootPrintStreamWaning = ::distconv::util::MPIRootPrintStreamWarning;
137126

138127
// Distconv layer classes
139128
using Backend = ::distconv::BackendDNNLib;
140-
using ReLU = ::distconv::ReLU<Backend>;
141-
using LeakyReLU = ::distconv::LeakyReLU<Backend>;
142-
template <typename TensorDataType>
143-
using Convolution = ::distconv::Convolution<Backend, TensorDataType>;
144-
template <typename TensorDataType>
145-
using ChannelwiseFullyConnected = ::distconv::ChannelwiseFullyConnected<Backend, TensorDataType>;
146-
template <typename TensorDataType>
147-
using Pooling = ::distconv::Pooling<Backend, TensorDataType>;
148-
template <typename TensorDataType>
149-
using BatchNormalization = ::distconv::BatchNormalization<Backend, TensorDataType>;
150-
template <typename TensorDataType>
151-
using MatMul = ::distconv::MatMul<Backend, TensorDataType>;
152-
template <typename TensorDataType>
153-
using ChannelwiseSoftmax = ::distconv::ChannelwiseSoftmax<Backend, TensorDataType>;
154-
using Softmax = ::distconv::Softmax<Backend>;
155-
using CrossEntropy = ::distconv::CrossEntropy<Backend>;
156-
using MeanSquaredError = ::distconv::MeanSquaredError<Backend>;
129+
using AlCommType = typename decltype(std::declval<Backend>()
130+
.get_al_mpi_cuda_comm())::element_type;
157131

158132
using ::distconv::get_channel_dim;
159133
using ::distconv::get_sample_dim;

0 commit comments

Comments
 (0)