aten/src/ATen/native/UpSampleTrilinear3d.cpp

// Adapted from interp.cpp from Caffe util by Pauline Luc
// Originally developed by George Papandreou

#include <ATen/ATen.h>
#include <ATen/NativeFunctions.h>
#include <ATen/native/UpSample.h>

namespace at {
namespace native {
namespace {

template <typename scalar_t>
static void upsample_trilinear3d_out_frame(
    scalar_t* odata,
    scalar_t* idata,
    int64_t input_depth,
    int64_t input_height,
    int64_t input_width,
    int64_t output_depth,
    int64_t output_height,
    int64_t output_width,
    int64_t nbatch,
    int64_t channels,
    bool align_corners) {
  channels = channels * nbatch;

  // special case: just copy
  if (input_depth == output_depth && input_height == output_height &&
      input_width == output_width) {
    for (int64_t t2 = 0; t2 < output_depth; ++t2) {
      const int64_t t1 = t2;

      for (int64_t h2 = 0; h2 < output_height; ++h2) {
        const int64_t h1 = h2;

        for (int64_t w2 = 0; w2 < output_width; ++w2) {
          const int64_t w1 = w2;
          const scalar_t* pos1 =
              &idata[t1 * input_height * input_width + h1 * input_width + w1];
          scalar_t* pos2 =
              &odata
                  [t2 * output_height * output_width + h2 * output_width + w2];

          for (int64_t c = 0; c < channels; ++c) {
            pos2[0] = pos1[0];
            pos1 += input_width * input_height * input_depth;
            pos2 += output_width * output_height * output_depth;
          }
        }
      }
    }
    return;
  }
  const scalar_t rdepth = linear_upsample_compute_scale<scalar_t>(
      input_depth, output_depth, align_corners);
  const scalar_t rheight = linear_upsample_compute_scale<scalar_t>(
      input_height, output_height, align_corners);
  const scalar_t rwidth = linear_upsample_compute_scale<scalar_t>(
      input_width, output_width, align_corners);
  for (int64_t t2 = 0; t2 < output_depth; ++t2) {
    const scalar_t t1r = linear_upsample_compute_source_index<scalar_t>(
        rdepth, t2, align_corners);

    const int64_t t1 = t1r;
    const int64_t t1p = (t1 < input_depth - 1) ? 1 : 0;
    const scalar_t t1lambda = t1r - t1;
    const scalar_t t0lambda = static_cast<scalar_t>(1.) - t1lambda;

    for (int64_t h2 = 0; h2 < output_height; ++h2) {
      const scalar_t h1r = linear_upsample_compute_source_index<scalar_t>(
          rheight, h2, align_corners);

      const int64_t h1 = h1r;
      const int64_t h1p = (h1 < input_height - 1) ? 1 : 0;
      const scalar_t h1lambda = h1r - h1;
      const scalar_t h0lambda = static_cast<scalar_t>(1.) - h1lambda;

      for (int64_t w2 = 0; w2 < output_width; ++w2) {
        const scalar_t w1r = linear_upsample_compute_source_index<scalar_t>(
            rwidth, w2, align_corners);

        const int64_t w1 = w1r;
        const int64_t w1p = (w1 < input_width - 1) ? 1 : 0;
        const scalar_t w1lambda = w1r - w1;
        const scalar_t w0lambda = static_cast<scalar_t>(1.) - w1lambda;
        const scalar_t* pos1 =
            &idata[t1 * input_height * input_width + h1 * input_width + w1];
        scalar_t* pos2 =
            &odata[t2 * output_height * output_width + h2 * output_width + w2];

        for (int64_t c = 0; c < channels; ++c) {
          pos2[0] = t0lambda *
                  (h0lambda * (w0lambda * pos1[0] + w1lambda * pos1[w1p]) +
                   h1lambda *
                       (w0lambda * pos1[h1p * input_width] +
                        w1lambda * pos1[h1p * input_width + w1p])) +
              t1lambda *
                  (h0lambda *
                       (w0lambda * pos1[t1p * input_height * input_width] +
                        w1lambda *
                            pos1[t1p * input_height * input_width + w1p]) +
                   h1lambda *
                       (w0lambda *
                            pos1
                                [t1p * input_height * input_width +
                                 h1p * input_width] +
                        w1lambda *
                            pos1
                                [t1p * input_height * input_width +
                                 h1p * input_width + w1p]));
          pos1 += input_width * input_height * input_depth;
          pos2 += output_width * output_height * output_depth;
        }
      }
    }
  }
}

template <typename scalar_t>
static void upsample_trilinear3d_backward_out_frame(
    scalar_t* odata,
    scalar_t* idata,
    int64_t input_depth,
    int64_t input_height,
    int64_t input_width,
    int64_t output_depth,
    int64_t output_height,
    int64_t output_width,
    int64_t nbatch,
    int64_t channels,
    bool align_corners) {
  channels = channels * nbatch;

  // special case: same-size matching grids
  if (input_depth == output_depth && input_height == output_height &&
      input_width == output_width) {
    for (int64_t t2 = 0; t2 < output_depth; ++t2) {
      const int64_t t1 = t2;

      for (int64_t h2 = 0; h2 < output_height; ++h2) {
        const int64_t h1 = h2;

        for (int64_t w2 = 0; w2 < output_width; ++w2) {
          const int64_t w1 = w2;
          scalar_t* pos1 =
              &idata[t1 * input_height * input_width + h1 * input_width + w1];
          const scalar_t* pos2 =
              &odata
                  [t2 * output_height * output_width + h2 * output_width + w2];

          for (int64_t c = 0; c < channels; ++c) {
            pos1[0] += pos2[0];
            pos1 += input_width * input_height * input_depth;
            pos2 += output_width * output_height * output_depth;
          }
        }
      }
    }
    return;
  }
  const scalar_t rdepth = linear_upsample_compute_scale<scalar_t>(
      input_depth, output_depth, align_corners);

  const scalar_t rheight = linear_upsample_compute_scale<scalar_t>(
      input_height, output_height, align_corners);

  const scalar_t rwidth = linear_upsample_compute_scale<scalar_t>(
      input_width, output_width, align_corners);

  for (int64_t t2 = 0; t2 < output_depth; ++t2) {
    const scalar_t t1r = linear_upsample_compute_source_index<scalar_t>(
        rdepth, t2, align_corners);
    const int64_t t1 = t1r;
    const int64_t t1p = (t1 < input_depth - 1) ? 1 : 0;
    const scalar_t t1lambda = t1r - t1;
    const scalar_t t0lambda = static_cast<scalar_t>(1.) - t1lambda;

    for (int64_t h2 = 0; h2 < output_height; ++h2) {
      const scalar_t h1r = linear_upsample_compute_source_index<scalar_t>(
          rheight, h2, align_corners);
      const int64_t h1 = h1r;
      const int64_t h1p = (h1 < input_height - 1) ? 1 : 0;
      const scalar_t h1lambda = h1r - h1;
      const scalar_t h0lambda = static_cast<scalar_t>(1.) - h1lambda;

      for (int64_t w2 = 0; w2 < output_width; ++w2) {
        const scalar_t w1r = linear_upsample_compute_source_index<scalar_t>(
            rwidth, w2, align_corners);
        const int64_t w1 = w1r;
        const int64_t w1p = (w1 < input_width - 1) ? 1 : 0;
        const scalar_t w1lambda = w1r - w1;
        const scalar_t w0lambda = static_cast<scalar_t>(1.) - w1lambda;
        scalar_t* pos1 =
            &idata[t1 * input_height * input_width + h1 * input_width + w1];
        const scalar_t* pos2 =
            &odata[t2 * output_height * output_width + h2 * output_width + w2];

        for (int64_t c = 0; c < channels; ++c) {
          pos1[0] += t0lambda * h0lambda * w0lambda * pos2[0];
          pos1[w1p] += t0lambda * h0lambda * w1lambda * pos2[0];
          pos1[h1p * input_width] += t0lambda * h1lambda * w0lambda * pos2[0];
          pos1[h1p * input_width + w1p] +=
              t0lambda * h1lambda * w1lambda * pos2[0];
          pos1[t1p * input_height * input_width] +=
              t1lambda * h0lambda * w0lambda * pos2[0];
          pos1[t1p * input_height * input_width + w1p] +=
              t1lambda * h0lambda * w1lambda * pos2[0];
          pos1[t1p * input_height * input_width + h1p * input_width] +=
              t1lambda * h1lambda * w0lambda * pos2[0];
          pos1[t1p * input_height * input_width + h1p * input_width + w1p] +=
              t1lambda * h1lambda * w1lambda * pos2[0];
          pos1 += input_width * input_height * input_depth;
          pos2 += output_width * output_height * output_depth;
        }
      }
    }
  }
}

static void upsample_trilinear3d_out_cpu_template(
    Tensor& output,
    const Tensor& input_,
    IntArrayRef output_size,
    bool align_corners) {
  AT_CHECK(
      output_size.size() == 3,
      "It is expected output_size equals to 3, but got size ",
      output_size.size());

  int64_t output_depth = output_size[0];
  int64_t output_height = output_size[1];
  int64_t output_width = output_size[2];

  int64_t nbatch = input_.size(0);
  int64_t channels = input_.size(1);
  int64_t input_depth = input_.size(2);
  int64_t input_height = input_.size(3);
  int64_t input_width = input_.size(4);

  upsample_3d_shape_check(
      input_,
      Tensor(),
      nbatch,
      channels,
      input_depth,
      input_height,
      input_width,
      output_depth,
      output_height,
      output_width);

  auto input = input_.contiguous();

  output.resize_({nbatch, channels, output_depth, output_height, output_width});
  output.zero_();

  AT_ASSERT(
      input_depth > 0 && input_height > 0 && input_width > 0 &&
      output_depth > 0 && output_height > 0 && output_width > 0);

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      input.scalar_type(), "upsample_trilinear3d", [&] {
        auto* idata = input.data<scalar_t>();
        auto* odata = output.data<scalar_t>();

        upsample_trilinear3d_out_frame<scalar_t>(
            odata,
            idata,
            input_depth,
            input_height,
            input_width,
            output_depth,
            output_height,
            output_width,
            nbatch,
            channels,
            align_corners);
      });
}

static void upsample_trilinear3d_backward_out_cpu_template(
    Tensor& grad_input,
    const Tensor& grad_output_,
    IntArrayRef output_size,
    IntArrayRef input_size,
    bool align_corners) {
  AT_CHECK(
      output_size.size() == 3,
      "It is expected output_size equals to 3, but got size ",
      output_size.size());

  AT_CHECK(
      input_size.size() == 5,
      "It is expected input_size equals to 5, but got size ",
      input_size.size());

  int64_t output_depth = output_size[0];
  int64_t output_height = output_size[1];
  int64_t output_width = output_size[2];

  int64_t nbatch = input_size[0];
  int64_t channels = input_size[1];
  int64_t input_depth = input_size[2];
  int64_t input_height = input_size[3];
  int64_t input_width = input_size[4];

  upsample_3d_shape_check(
      Tensor(),
      grad_output_,
      nbatch,
      channels,
      input_depth,
      input_height,
      input_width,
      output_depth,
      output_height,
      output_width);

  auto grad_output = grad_output_.contiguous();

  grad_input.resize_(
      {nbatch, channels, input_depth, input_height, input_width});
  grad_input.zero_();

  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
      grad_output.scalar_type(), "upsample_trilinear3d_backward", [&] {
        scalar_t* idata = grad_input.data<scalar_t>();
        scalar_t* odata = grad_output.data<scalar_t>();

        upsample_trilinear3d_backward_out_frame<scalar_t>(
            odata,
            idata,
            input_depth,
            input_height,
            input_width,
            output_depth,
            output_height,
            output_width,
            nbatch,
            channels,
            align_corners);
      });
}
} // namespace

Tensor& upsample_trilinear3d_out_cpu(
    Tensor& output,
    const Tensor& input,
    IntArrayRef output_size,
    bool align_corners) {
  upsample_trilinear3d_out_cpu_template(
      output, input, output_size, align_corners);
  return output;
}

Tensor upsample_trilinear3d_cpu(
    const Tensor& input,
    IntArrayRef output_size,
    bool align_corners) {
  auto output = at::empty({0}, input.options());
  upsample_trilinear3d_out_cpu_template(
      output, input, output_size, align_corners);
  return output;
}

Tensor& upsample_trilinear3d_backward_out_cpu(
    Tensor& grad_input,
    const Tensor& grad_output,
    IntArrayRef output_size,
    IntArrayRef input_size,
    bool align_corners) {
  upsample_trilinear3d_backward_out_cpu_template(
      grad_input, grad_output, output_size, input_size, align_corners);
  return grad_input;
}

Tensor upsample_trilinear3d_backward_cpu(
    const Tensor& grad_output,
    IntArrayRef output_size,
    IntArrayRef input_size,
    bool align_corners) {
  auto grad_input = at::zeros(input_size, grad_output.options());
  upsample_trilinear3d_backward_out_cpu_template(
      grad_input, grad_output, output_size, input_size, align_corners);
  return grad_input;
}

} // namespace native
} // namespace at