Skip to content

Anderson2021 autoscheduler fails with: Condition failed: at_or_inside_block() #8256

Open
@jansel

Description

@jansel

This code is a cleaned up lowering of part of torch.argmax(torch.adaptive_avg_pool1d(...))

repro.py

import halide as hl


@hl.generator(name="kernel")
class Kernel:
    in_ptr0 = hl.InputBuffer(hl.Float(64), 1)
    out_ptr0 = hl.OutputBuffer(hl.Int(64), 1)

    def generate(g):
        in_ptr0 = g.in_ptr0
        out_ptr0 = g.out_ptr0
        rindex = hl.Var("rindex")
        r0 = rindex % 2
        r1 = rindex // 2
        rdom = hl.RDom([hl.Range(0, 32)])
        tmp3 = hl.Func("tmp3")
        tmp3[rindex] = (3 * r0) // 2
        tmp4 = hl.Func("tmp4")
        tmp4[rindex] = 2 + ((3 * r0) // 2)
        tmp5 = hl.Func("tmp5")
        tmp5[rindex] = tmp3[rindex] < tmp4[rindex]
        tmp7 = hl.Func("tmp7")
        tmp7[rindex] = hl.BoundaryConditions.constant_exterior(in_ptr0, 0)[
            (3 * r1) + ((3 * r0) // 2)
        ]
        tmp9 = hl.Func("tmp9")
        tmp9[rindex] = hl.select(tmp5[rindex], tmp7[rindex], hl.f64(0.0))
        tmp10 = hl.Func("tmp10")
        tmp10[rindex] = 1 + ((3 * r0) // 2)
        tmp11 = hl.Func("tmp11")
        tmp11[rindex] = tmp10[rindex] < tmp4[rindex]
        tmp13 = hl.Func("tmp13")
        tmp13[rindex] = hl.BoundaryConditions.constant_exterior(in_ptr0, 0)[
            1 + (3 * r1) + ((3 * r0) // 2)
        ]
        tmp15 = hl.Func("tmp15")
        tmp15[rindex] = hl.select(tmp11[rindex], tmp13[rindex], hl.f64(0.0))
        tmp16 = hl.Func("tmp16")
        tmp16[rindex] = tmp15[rindex] + tmp9[rindex]
        tmp19 = hl.Func("tmp19")
        tmp19[rindex] = hl.select(tmp5[rindex], hl.f64(1.0), hl.f64(0.0))
        tmp20 = hl.Func("tmp20")
        tmp20[rindex] = hl.select(tmp11[rindex], hl.f64(1.0), hl.f64(0.0))
        tmp21 = hl.Func("tmp21")
        tmp21[rindex] = tmp20[rindex] + tmp19[rindex]
        tmp22 = hl.Func("tmp22")
        tmp22[rindex] = tmp16[rindex] / tmp21[rindex]
        tmp23 = hl.argmax(rdom, tmp22[rdom])[0]
        out_ptr0[hl.Var()] = hl.cast(out_ptr0.type(), tmp23)

        assert g.using_autoscheduler()
        in_ptr0.set_estimates([hl.Range(0, 48)])
        # the range here is actually 1, but setting it to 2 to workaround: https://github.com/halide/Halide/issues/8246
        out_ptr0.set_estimates([hl.Range(0, 2)])


if __name__ == "__main__":
    import sys, tempfile

    with tempfile.TemporaryDirectory() as out:
        sys.argv = [
            "repro.py",
            "-g", "kernel",
            "-o", out,
            "-f", "halide_kernel",
            "-e", "static_library,h,schedule",
            "-p", "/home/jansel/conda/envs/pytorch/lib/libautoschedule_anderson2021.so",
            "target=host-cuda-cuda_capability_86-user_context-strict_float-no_asserts",
            "autoscheduler=Anderson2021",
            "autoscheduler.parallelism=82",
        ]
        hl.main()

(you will need to update the path to libautoschedule_anderson2021.so)

Output:

Unhandled exception: Internal Error at /home/jansel/Halide/src/autoschedulers/anderson2021/GPULoopInfo.cpp:92 triggered by user code at : Condition failed: at_or_inside_block(): 

Traceback (most recent call last):
  File "/home/jansel/pytorch/repro.py", line 72, in <module>
    hl.main()
RuntimeError: Generator failed: -1

The code includes a workaround to #8246 by saying the output size is 2 (when it is actually 1). If I remove that workaround, I get the same error as #8246. I think the workaround is uncovering a different issue, but the two issues are possibly related.

Metadata

Metadata

Assignees

Labels

autoschedulerRelated to one or more of the Autoschedulers

Type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions