Skip to content

cranelift: Optimize select+icmp into {s,u}{min,max} #5546

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Feb 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 128 additions & 0 deletions cranelift/codegen/src/opts/algebraic.isle
Original file line number Diff line number Diff line change
Expand Up @@ -345,3 +345,131 @@
(uextend $I64 x @ (value_type $I32))
(iconst _ (u64_from_imm64 0))))
(iconst ty (imm64 1)))


;; Transform select-of-icmp into {u,s}{min,max} instructions where possible.
(rule (simplify
(select ty (icmp _ (IntCC.SignedGreaterThan) x y) x y))
(smax ty x y))
(rule (simplify
(select ty (icmp _ (IntCC.SignedGreaterThanOrEqual) x y) x y))
(smax ty x y))
(rule (simplify
(select ty (icmp _ (IntCC.UnsignedGreaterThan) x y) x y))
(umax ty x y))
(rule (simplify
(select ty (icmp _ (IntCC.UnsignedGreaterThanOrEqual) x y) x y))
(umax ty x y))
(rule (simplify
(select ty (icmp _ (IntCC.SignedLessThan) x y) x y))
(smin ty x y))
(rule (simplify
(select ty (icmp _ (IntCC.SignedLessThanOrEqual) x y) x y))
(smin ty x y))
(rule (simplify
(select ty (icmp _ (IntCC.UnsignedLessThan) x y) x y))
(umin ty x y))
(rule (simplify
(select ty (icmp _ (IntCC.UnsignedLessThanOrEqual) x y) x y))
(umin ty x y))


;; These are the same rules as above, but when the operands for select are swapped
(rule (simplify
(select ty (icmp _ (IntCC.SignedLessThan) x y) y x))
(smax ty x y))
(rule (simplify
(select ty (icmp _ (IntCC.SignedLessThanOrEqual) x y) y x))
(smax ty x y))
(rule (simplify
(select ty (icmp _ (IntCC.UnsignedLessThan) x y) y x))
(umax ty x y))
(rule (simplify
(select ty (icmp _ (IntCC.UnsignedLessThanOrEqual) x y) y x))
(umax ty x y))
(rule (simplify
(select ty (icmp _ (IntCC.SignedGreaterThan) x y) y x))
(smin ty x y))
(rule (simplify
(select ty (icmp _ (IntCC.SignedGreaterThanOrEqual) x y) y x))
(smin ty x y))
(rule (simplify
(select ty (icmp _ (IntCC.UnsignedGreaterThan) x y) y x))
(umin ty x y))
(rule (simplify
(select ty (icmp _ (IntCC.UnsignedGreaterThanOrEqual) x y) y x))
(umin ty x y))

;; Transform vselect-of-icmp into {u,s}{min,max} instructions where possible.
(rule (simplify
(vselect ty (icmp _ (IntCC.SignedGreaterThan) x y) x y))
(smax ty x y))
(rule (simplify
(vselect ty (icmp _ (IntCC.SignedGreaterThanOrEqual) x y) x y))
(smax ty x y))
(rule (simplify
(vselect ty (icmp _ (IntCC.UnsignedGreaterThan) x y) x y))
(umax ty x y))
(rule (simplify
(vselect ty (icmp _ (IntCC.UnsignedGreaterThanOrEqual) x y) x y))
(umax ty x y))
(rule (simplify
(vselect ty (icmp _ (IntCC.SignedLessThan) x y) x y))
(smin ty x y))
(rule (simplify
(vselect ty (icmp _ (IntCC.SignedLessThanOrEqual) x y) x y))
(smin ty x y))
(rule (simplify
(vselect ty (icmp _ (IntCC.UnsignedLessThan) x y) x y))
(umin ty x y))
(rule (simplify
(vselect ty (icmp _ (IntCC.UnsignedLessThanOrEqual) x y) x y))
(umin ty x y))

;; These are the same rules as above, but when the operands for select are swapped
(rule (simplify
(vselect ty (icmp _ (IntCC.SignedLessThan) x y) y x))
(smax ty x y))
(rule (simplify
(vselect ty (icmp _ (IntCC.SignedLessThanOrEqual) x y) y x))
(smax ty x y))
(rule (simplify
(vselect ty (icmp _ (IntCC.UnsignedLessThan) x y) y x))
(umax ty x y))
(rule (simplify
(vselect ty (icmp _ (IntCC.UnsignedLessThanOrEqual) x y) y x))
(umax ty x y))
(rule (simplify
(vselect ty (icmp _ (IntCC.SignedGreaterThan) x y) y x))
(smin ty x y))
(rule (simplify
(vselect ty (icmp _ (IntCC.SignedGreaterThanOrEqual) x y) y x))
(smin ty x y))
(rule (simplify
(vselect ty (icmp _ (IntCC.UnsignedGreaterThan) x y) y x))
(umin ty x y))
(rule (simplify
(vselect ty (icmp _ (IntCC.UnsignedGreaterThanOrEqual) x y) y x))
(umin ty x y))

;; For floats convert fcmp lt into pseudo_min and gt into pseudo_max
;;
;; fmax_pseudo docs state:
;; The behaviour for this operations is defined as fmax_pseudo(a, b) = (a < b) ? b : a, and the behaviour for zero
;; or NaN inputs follows from the behaviour of < with such inputs.
;;
;; That is exactly the operation that we match here!
(rule (simplify
(select ty (fcmp _ (FloatCC.LessThan) x y) x y))
(fmin_pseudo ty x y))
(rule (simplify
(select ty (fcmp _ (FloatCC.GreaterThan) x y) x y))
(fmax_pseudo ty x y))

;; Do the same for vectors
(rule (simplify
(vselect ty (fcmp _ (FloatCC.LessThan) x y) x y))
(fmin_pseudo ty x y))
(rule (simplify
(vselect ty (fcmp _ (FloatCC.GreaterThan) x y) x y))
(fmax_pseudo ty x y))
2 changes: 1 addition & 1 deletion cranelift/filetests/filetests/egraph/licm.clif
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ block2(v9: i32):
; check: v8 = iadd v2, v3
; check: brif v6, block2, block1(v8)


; check: block2:
; check: v10 = iconst.i32 1
; check: v4 = iadd.i32 v1, v10
; check: return v4

155 changes: 155 additions & 0 deletions cranelift/filetests/filetests/egraph/select.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
test optimize
set opt_level=speed
set use_egraphs=true
target x86_64
target aarch64
target s390x
target riscv64

function %select_sgt_to_smax(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = icmp sgt v0, v1
v3 = select v2, v0, v1
return v3
}

; check: block0(v0: i32, v1: i32):
; check: v4 = smax v0, v1
; check: return v4


; This tests an inverted select, where the operands are swapped.
function %select_sgt_to_smax_inverse(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = icmp sgt v0, v1
v3 = select v2, v1, v0
return v3
}

; check: block0(v0: i32, v1: i32):
; check: v4 = smin v0, v1
; check: return v4


function %select_sge_to_smax(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = icmp sge v0, v1
v3 = select v2, v0, v1
return v3
}

; check: block0(v0: i32, v1: i32):
; check: v4 = smax v0, v1
; check: return v4


function %select_ugt_to_umax(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = icmp ugt v0, v1
v3 = select v2, v0, v1
return v3
}

; check: block0(v0: i32, v1: i32):
; check: v4 = umax v0, v1
; check: return v4


function %select_uge_to_umax(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = icmp uge v0, v1
v3 = select v2, v0, v1
return v3
}

; check: block0(v0: i32, v1: i32):
; check: v4 = umax v0, v1
; check: return v4



function %select_slt_to_smin(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = icmp slt v0, v1
v3 = select v2, v0, v1
return v3
}

; check: block0(v0: i32, v1: i32):
; check: v4 = smin v0, v1
; check: return v4


function %select_sle_to_smin(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = icmp sle v0, v1
v3 = select v2, v0, v1
return v3
}

; check: block0(v0: i32, v1: i32):
; check: v4 = smin v0, v1
; check: return v4


function %select_ult_to_umin(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = icmp ult v0, v1
v3 = select v2, v0, v1
return v3
}

; check: block0(v0: i32, v1: i32):
; check: v4 = umin v0, v1
; check: return v4


function %select_ule_to_umin(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = icmp ule v0, v1
v3 = select v2, v0, v1
return v3
}

; check: block0(v0: i32, v1: i32):
; check: v4 = umin v0, v1
; check: return v4



function %select_with_different_regs_does_not_optimize(i32, i32, i32, i32) -> i32 {
block0(v0: i32, v1: i32, v2: i32, v3: i32):
v4 = icmp ule v0, v1
v5 = select v4, v2, v3
return v5
}

; check: block0(v0: i32, v1: i32, v2: i32, v3: i32):
; check: v4 = icmp ule v0, v1
; check: v5 = select v4, v2, v3
; check: return v5




function %select_fcmp_gt_to_fmax_pseudo(f32, f32) -> f32 {
block0(v0: f32, v1: f32):
v2 = fcmp gt v0, v1
v3 = select v2, v0, v1
return v3
}

; check: block0(v0: f32, v1: f32):
; check: v4 = fmax_pseudo v0, v1
; check: return v4

function %select_fcmp_lt_to_fmin_pseudo(f32, f32) -> f32 {
block0(v0: f32, v1: f32):
v2 = fcmp lt v0, v1
v3 = select v2, v0, v1
return v3
}

; check: block0(v0: f32, v1: f32):
; check: v4 = fmin_pseudo v0, v1
; check: return v4
Loading