|
37 | 37 | #include "lbann/layers/misc/distconv/distconv_channelwise_softmax.hpp" |
38 | 38 | #endif |
39 | 39 |
|
40 | | - |
41 | 40 | namespace lbann { |
42 | 41 |
|
43 | 42 | #ifdef LBANN_HAS_DISTCONV |
| 43 | +namespace dc { |
| 44 | +template <typename TensorDataType> |
| 45 | +using ChannelwiseSoftmax = |
| 46 | + ::distconv::ChannelwiseSoftmax<Backend, TensorDataType>; |
| 47 | +} // namespace dc |
| 48 | + |
44 | 49 | template <typename TensorDataType, data_layout Layout, El::Device Device> |
45 | 50 | class channelwise_softmax_distconv_adapter |
46 | | - : public data_type_distconv_adapter<TensorDataType>{ |
47 | | - public: |
48 | | - using TensorDevType = typename data_type_distconv_adapter<TensorDataType>::TensorDevType; |
49 | | - |
50 | | - channelwise_softmax_distconv_adapter(Layer& layer) |
51 | | - : data_type_distconv_adapter<TensorDataType>(layer){} |
52 | | - |
53 | | - virtual ~channelwise_softmax_distconv_adapter() = default; |
54 | | - void setup_distributions(tensor_overlap_constraints &constraints) override; |
55 | | - void setup_layer(size_t workspace_capacity) override; |
56 | | - void fp_compute(); |
57 | | - void bp_compute(); |
58 | | - std::unique_ptr<dc::ChannelwiseSoftmax<TensorDataType>> m_channelwise_softmax_operator; |
59 | | - }; // class definition channelwise_softmax_distconv_adapter |
60 | | - |
61 | | -#endif // LBANN_HAS_DISTCONV |
| 51 | + : public data_type_distconv_adapter<TensorDataType> |
| 52 | +{ |
| 53 | +public: |
| 54 | + using TensorDevType = |
| 55 | + typename data_type_distconv_adapter<TensorDataType>::TensorDevType; |
| 56 | + |
| 57 | + channelwise_softmax_distconv_adapter(Layer& layer) |
| 58 | + : data_type_distconv_adapter<TensorDataType>(layer) |
| 59 | + {} |
| 60 | + |
| 61 | + virtual ~channelwise_softmax_distconv_adapter() = default; |
| 62 | + void setup_distributions(tensor_overlap_constraints& constraints) override; |
| 63 | + void setup_layer(size_t workspace_capacity) override; |
| 64 | + void fp_compute(); |
| 65 | + void bp_compute(); |
| 66 | + std::unique_ptr<dc::ChannelwiseSoftmax<TensorDataType>> |
| 67 | + m_channelwise_softmax_operator; |
| 68 | +}; // class definition channelwise_softmax_distconv_adapter |
62 | 69 |
|
| 70 | +#endif // LBANN_HAS_DISTCONV |
63 | 71 |
|
64 | 72 | /** @brief Apply softmax to tensor channels. |
65 | 73 | * |
@@ -121,14 +129,29 @@ class channelwise_softmax_layer : public data_type_layer<TensorDataType> |
121 | 129 | void bp_compute() override; |
122 | 130 |
|
123 | 131 | #ifdef LBANN_HAS_DISTCONV |
124 | | - friend class channelwise_softmax_distconv_adapter<TensorDataType, Layout, Device>; |
125 | | - protected: |
126 | | - void setup_distconv_adapter(const DataReaderMetaData& dr_metadata) override; |
127 | | - bool is_distconv_supported() const override; |
128 | | - channelwise_softmax_distconv_adapter<TensorDataType, Layout, Device>& get_distconv_adapter() override; |
129 | | - const channelwise_softmax_distconv_adapter<TensorDataType, Layout, Device>& get_distconv_adapter() const override; |
| 132 | + friend class channelwise_softmax_distconv_adapter<TensorDataType, |
| 133 | + Layout, |
| 134 | + Device>; |
| 135 | + |
| 136 | +protected: |
| 137 | + void setup_distconv_adapter() override; |
| 138 | + bool is_distconv_supported() const override; |
| 139 | + channelwise_softmax_distconv_adapter<TensorDataType, Layout, Device>& |
| 140 | + get_distconv_adapter() override; |
| 141 | + const channelwise_softmax_distconv_adapter<TensorDataType, Layout, Device>& |
| 142 | + get_distconv_adapter() const override; |
130 | 143 | #endif // LBANN_HAS_DISTCONV |
| 144 | +private: |
| 145 | + void get_channel_size_and_stride(El::Int& channel_size, |
| 146 | + El::Int& channel_stride, |
| 147 | + El::Int& num_channels) const; |
| 148 | + |
| 149 | + /** Specifies the dimension of the tensor to perform softmax on. */ |
| 150 | + int64_t m_dim; |
131 | 151 |
|
| 152 | + /** @brief If true, only performs softmax on the chosen dimension. Otherwise |
| 153 | + all dimensions but ``m_dim`` will be used. */ |
| 154 | + bool m_single_dim_mode; |
132 | 155 | }; |
133 | 156 |
|
134 | 157 | // Builder function |
@@ -184,127 +207,114 @@ El::Device channelwise_softmax_layer<TensorDataType, Layout, Device>:: |
184 | 207 | return Device; |
185 | 208 | } |
186 | 209 |
|
187 | | -template <typename TensorDataType, data_layout Layout, El::Device Device> |
188 | | -void channelwise_softmax_layer<TensorDataType,Layout,Device>::setup_dims(DataReaderMetaData& dr_metadata) { |
189 | | - data_type_layer<TensorDataType>::setup_dims(dr_metadata); |
190 | | - this->set_output_dims(this->get_input_dims()); |
191 | | - |
192 | | - #ifdef LBANN_HAS_DISTCONV |
193 | | - |
194 | | - if (this->distconv_enabled()){ |
195 | | - // Additional checks when distconv mode is enabled |
196 | | - const auto& input_dims = this->get_input_dims(); |
197 | | - const auto& output_dims = this->get_output_dims(); |
198 | | - |
199 | | - if (input_dims.size() != 3 || output_dims.size() != 3){ |
200 | | - LBANN_ERROR(this->get_type()," layer \"",this->get_name(),"\" ", |
201 | | - "expects an input and output tensor with 3 dimensions (channel, *, *), " |
202 | | - "but it has been configured as a ", |
203 | | - input_dims.size(), "-D input tensor and ", |
204 | | - output_dims.size(),"-D output tensor"); |
205 | | - } |
206 | | - } |
207 | | - #endif // LBANN_HAS_DISTCONV |
208 | | -} |
209 | | - |
210 | 210 | #ifdef LBANN_HAS_DISTCONV |
211 | 211 |
|
212 | 212 | // ========================================================= |
213 | 213 | // DistConv-Adapter member functions |
214 | 214 | // ========================================================= |
215 | 215 | template <typename TensorDataType, data_layout Layout, El::Device Device> |
216 | | -void |
217 | | -channelwise_softmax_distconv_adapter<TensorDataType, Layout, Device> |
218 | | -::setup_distributions(tensor_overlap_constraints &constraints){ |
| 216 | +void channelwise_softmax_distconv_adapter<TensorDataType, Layout, Device>:: |
| 217 | + setup_distributions(tensor_overlap_constraints& constraints) |
| 218 | +{ |
219 | 219 | data_type_distconv_adapter<TensorDataType>::setup_distributions(constraints); |
220 | 220 |
|
221 | | - for (auto &d: this->m_prev_activations_dists) { |
| 221 | + for (auto& d : this->m_prev_activations_dists) { |
222 | 222 | d.clear_overlap(); |
223 | 223 | constraints.mark_updated(d); |
224 | 224 | constraints.mark_invariant(d); |
225 | 225 | } |
226 | | - for (auto &d: this->m_activations_dists) { |
| 226 | + for (auto& d : this->m_activations_dists) { |
227 | 227 | d.clear_overlap(); |
228 | 228 | constraints.mark_updated(d); |
229 | 229 | constraints.mark_invariant(d); |
230 | 230 | } |
231 | | - for (auto &d: this->m_prev_error_signals_dists) { |
| 231 | + for (auto& d : this->m_prev_error_signals_dists) { |
232 | 232 | d.clear_overlap(); |
233 | 233 | constraints.mark_updated(d); |
234 | 234 | constraints.mark_invariant(d); |
235 | 235 | } |
236 | | - for (auto &d: this->m_error_signals_dists) { |
| 236 | + for (auto& d : this->m_error_signals_dists) { |
237 | 237 | d.clear_overlap(); |
238 | 238 | constraints.mark_updated(d); |
239 | 239 | constraints.mark_invariant(d); |
240 | 240 | } |
241 | 241 | } |
242 | 242 |
|
243 | 243 | template <typename TensorDataType, data_layout Layout, El::Device Device> |
244 | | -void |
245 | | -channelwise_softmax_distconv_adapter<TensorDataType, Layout, Device> |
246 | | -::setup_layer(size_t workspace_capacity){ |
| 244 | +void channelwise_softmax_distconv_adapter<TensorDataType, Layout, Device>:: |
| 245 | + setup_layer(size_t workspace_capacity) |
| 246 | +{ |
247 | 247 | data_type_distconv_adapter<TensorDataType>::setup_layer(workspace_capacity); |
248 | 248 |
|
249 | | - m_channelwise_softmax_operator = std::make_unique<dc::ChannelwiseSoftmax<TensorDataType>>(dc::get_backend()); |
| 249 | + m_channelwise_softmax_operator = |
| 250 | + std::make_unique<dc::ChannelwiseSoftmax<TensorDataType>>(dc::get_backend()); |
250 | 251 | } |
251 | 252 |
|
252 | 253 | template <typename TensorDataType, data_layout Layout, El::Device Device> |
253 | | -void |
254 | | -channelwise_softmax_distconv_adapter<TensorDataType, Layout, Device> |
255 | | -::fp_compute(){ |
256 | | - auto &layer = dynamic_cast< |
257 | | - channelwise_softmax_layer<TensorDataType, Layout, Device>&>(this->layer()); |
| 254 | +void channelwise_softmax_distconv_adapter<TensorDataType, Layout, Device>:: |
| 255 | + fp_compute() |
| 256 | +{ |
| 257 | + auto& layer = |
| 258 | + dynamic_cast<channelwise_softmax_layer<TensorDataType, Layout, Device>&>( |
| 259 | + this->layer()); |
258 | 260 | m_channelwise_softmax_operator->forward(this->get_prev_activations(0), |
259 | 261 | this->get_activations(0)); |
260 | 262 | } |
261 | 263 |
|
262 | 264 | template <typename TensorDataType, data_layout Layout, El::Device Device> |
263 | | -void |
264 | | -channelwise_softmax_distconv_adapter<TensorDataType, Layout, Device> |
265 | | -::bp_compute(){ |
266 | | - auto &layer = dynamic_cast< |
267 | | - channelwise_softmax_layer<TensorDataType, Layout, Device>&>(this->layer()); |
268 | | - m_channelwise_softmax_operator->backward(this->get_activations(0), |
269 | | - this->get_prev_error_signals(), |
270 | | - this->get_error_signals(0)); |
| 265 | +void channelwise_softmax_distconv_adapter<TensorDataType, Layout, Device>:: |
| 266 | + bp_compute() |
| 267 | +{ |
| 268 | + auto& layer = |
| 269 | + dynamic_cast<channelwise_softmax_layer<TensorDataType, Layout, Device>&>( |
| 270 | + this->layer()); |
| 271 | + m_channelwise_softmax_operator->backward(this->get_activations(0), |
| 272 | + this->get_prev_error_signals(), |
| 273 | + this->get_error_signals(0)); |
271 | 274 | } |
272 | 275 | // ============================================================= |
273 | 276 | // DistConv-enabled Channelwise-Softmax member functions |
274 | 277 | // ============================================================= |
275 | 278 |
|
276 | 279 | template <typename TensorDataType, data_layout Layout, El::Device Device> |
277 | | -bool |
278 | | -channelwise_softmax_layer<TensorDataType, Layout, Device> |
279 | | -::is_distconv_supported() const { |
280 | | - return Device==El::Device::GPU && Layout == data_layout::DATA_PARALLEL; |
| 280 | +bool channelwise_softmax_layer<TensorDataType, Layout, Device>:: |
| 281 | + is_distconv_supported() const |
| 282 | +{ |
| 283 | + return Device == El::Device::GPU && Layout == data_layout::DATA_PARALLEL; |
281 | 284 | } |
282 | 285 |
|
283 | 286 | template <typename TensorDataType, data_layout Layout, El::Device Device> |
284 | | -void |
285 | | -channelwise_softmax_layer<TensorDataType, Layout, Device> |
286 | | -::setup_distconv_adapter(const DataReaderMetaData& dr_metadata){ |
287 | | - this->get_distconv_adapter_ptr() = std::make_unique<channelwise_softmax_distconv_adapter< |
288 | | - TensorDataType, Layout, Device>>(*this); |
| 287 | +void channelwise_softmax_layer<TensorDataType, Layout, Device>:: |
| 288 | + setup_distconv_adapter() |
| 289 | +{ |
| 290 | + this->get_distconv_adapter_ptr() = std::make_unique< |
| 291 | + channelwise_softmax_distconv_adapter<TensorDataType, Layout, Device>>( |
| 292 | + *this); |
289 | 293 | } |
290 | 294 |
|
291 | 295 | template <typename TensorDataType, data_layout Layout, El::Device Device> |
292 | 296 | const channelwise_softmax_distconv_adapter<TensorDataType, Layout, Device>& |
293 | | -channelwise_softmax_layer<TensorDataType, Layout, Device> |
294 | | -::get_distconv_adapter() const{ |
295 | | - return dynamic_cast<const channelwise_softmax_distconv_adapter< |
296 | | - TensorDataType, Layout, Device>&>(data_type_layer<TensorDataType>::get_distconv_adapter()); |
| 297 | +channelwise_softmax_layer<TensorDataType, Layout, Device>:: |
| 298 | + get_distconv_adapter() const |
| 299 | +{ |
| 300 | + return dynamic_cast<const channelwise_softmax_distconv_adapter<TensorDataType, |
| 301 | + Layout, |
| 302 | + Device>&>( |
| 303 | + data_type_layer<TensorDataType>::get_distconv_adapter()); |
297 | 304 | } |
298 | 305 |
|
299 | 306 | template <typename TensorDataType, data_layout Layout, El::Device Device> |
300 | 307 | channelwise_softmax_distconv_adapter<TensorDataType, Layout, Device>& |
301 | | -channelwise_softmax_layer<TensorDataType, Layout, Device> |
302 | | -::get_distconv_adapter(){ |
303 | | - return const_cast<channelwise_softmax_distconv_adapter<TensorDataType, Layout, Device>&>( |
304 | | - static_cast<const channelwise_softmax_layer<TensorDataType, Layout, Device>&>(*this).get_distconv_adapter()); |
| 308 | +channelwise_softmax_layer<TensorDataType, Layout, Device>:: |
| 309 | + get_distconv_adapter() |
| 310 | +{ |
| 311 | + return const_cast< |
| 312 | + channelwise_softmax_distconv_adapter<TensorDataType, Layout, Device>&>( |
| 313 | + static_cast< |
| 314 | + const channelwise_softmax_layer<TensorDataType, Layout, Device>&>(*this) |
| 315 | + .get_distconv_adapter()); |
305 | 316 | } |
306 | 317 |
|
307 | | - |
308 | 318 | #endif // LBANN_HAS_DISTCONV |
309 | 319 |
|
310 | 320 | #ifndef LBANN_CHANNELWISE_SOFTMAX_LAYER_INSTANTIATE |
|
0 commit comments