Skip to content

loop-unroll failure in Autoscheduler #4271

Open
@steven-johnson

Description

@steven-johnson

Using the simple Blur generator below, run it thru the single-shot autoscheduler with the default weights. Result will be failure with:

Can only unroll for loops over a constant extent.
Loop over repeat_edge.s0._0._0 has extent (((min((((select((0 < blurred_y.s0.x.x), (max(min((blurred_y.extent.0 - (blurred_y.s0.x.x*128)), 128), 0) + 31), 159)/32)*32) + (select((0 < blurred_y.s0.x.x), min((blurred_y.s0.x.x*128), blurred_y.extent.0), min((blurred_y.s0.x.x*128), (blurred_y.extent.0 + -128))) + blurred_y.min.0)), ((min((blurred_y.s0.x.x*128), (blurred_y.extent.0 + -128)) + blurred_y.min.0) + 128)) - (min(select((0 < blurred_y.s0.x.x), min((blurred_y.s0.x.x*128), blurred_y.extent.0), min((blurred_y.s0.x.x*128), (blurred_y.extent.0 + -128))), (min((blurred_y.s0.x.x*128), (blurred_y.extent.0 + -128)) + 96)) + blurred_y.min.0)) + 39)/32).

Source:


namespace {

using namespace Halide;
using namespace Halide::BoundaryConditions;
using namespace Halide::ConciseCasts;

constexpr int kWidth = 480;
constexpr int kHeight = 640;
constexpr float kGamma = 1.45f;
constexpr int kRadius = 4;
constexpr int kDiameter = kRadius * 2 + 1;
constexpr int kRangeBits = 8;
constexpr uint32_t kRange = static_cast<uint32_t>(1) << kRangeBits;

Var x("x"), y("y");

inline float GaussianCoefficient(int r) {
    return std::exp(-0.5f * r * r / (kGamma * kGamma));
}

class Blur : public Halide::Generator<Blur> {
public:
  Input<Buffer<int8_t>> input_{"input", 2};
  Output<Buffer<int8_t>> output_{"output", 2};

  void generate() {
    Func input_bounded = repeat_edge(input_);

    int16_t coefficients_data[kDiameter];
    int16_t* coefficients = &coefficients_data[kRadius];
    double sum = 0.0;
    for (int rx = -kRadius; rx <= kRadius; rx++) {
      sum += GaussianCoefficient(rx);
    }
    const double scale = kRange / sum;
    for (int rx = -kRadius; rx <= kRadius; rx++) {
      coefficients[rx] = GaussianCoefficient(rx) * scale;
    }

    Expr gx = i16(0);
    for (int rx = -kRadius; rx <= kRadius; rx++) {
      gx += i16(input_bounded(x + rx, y)) * Expr(coefficients[rx]);
    }
    Func blurred_x("blurred_x");
    blurred_x(x, y) = i8(gx / (1 << kRangeBits));

    Expr gy = i16(0);
    for (int ry = -kRadius; ry <= kRadius; ry++) {
      gy += i16(blurred_x(x, y + ry)) * Expr(coefficients[ry]);
    }
    Func blurred_y("blurred_y");
    blurred_y(x, y) = i8(gy / (1 << kRangeBits));

    output_ = blurred_y;

    {
      input_.set_estimates({{0, kWidth}, {0, kHeight}});
      output_.set_estimates({{0, kWidth}, {0, kHeight}});
    }

    if (!auto_schedule) {
      input_.dim(0).set_bounds(0, kWidth).set_stride(1);
      input_.dim(1).set_bounds(0, kHeight).set_stride(kWidth);
      blurred_x.compute_root();
      output_.compute_root();
    }
  }
};

}  // namespace

Metadata

Metadata

Assignees

Labels

autoschedulerRelated to one or more of the Autoschedulers

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions