Skip to content

Commit 76539ef

Browse files
authored
cranelift: Optimize select+icmp into {s,u}{min,max} (#5546)
* cranelift: Optimize `select+icmp` into `{s,u}{min,max}` * cranelift: Add generic egraph icmp reverse rule * cranelift: Optimize `vselect+icmp` into `{s,u}{min,max}` * cranelift: Optimize some `vselect+fcmp` into `f{min,max}_pseudo` * cranelift: Add inverted forms of min/max rules
1 parent f0137c2 commit 76539ef

File tree

4 files changed

+438
-1
lines changed

4 files changed

+438
-1
lines changed

cranelift/codegen/src/opts/algebraic.isle

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,3 +345,131 @@
345345
(uextend $I64 x @ (value_type $I32))
346346
(iconst _ (u64_from_imm64 0))))
347347
(iconst ty (imm64 1)))
348+
349+
350+
;; Transform select-of-icmp into {u,s}{min,max} instructions where possible.
351+
(rule (simplify
352+
(select ty (icmp _ (IntCC.SignedGreaterThan) x y) x y))
353+
(smax ty x y))
354+
(rule (simplify
355+
(select ty (icmp _ (IntCC.SignedGreaterThanOrEqual) x y) x y))
356+
(smax ty x y))
357+
(rule (simplify
358+
(select ty (icmp _ (IntCC.UnsignedGreaterThan) x y) x y))
359+
(umax ty x y))
360+
(rule (simplify
361+
(select ty (icmp _ (IntCC.UnsignedGreaterThanOrEqual) x y) x y))
362+
(umax ty x y))
363+
(rule (simplify
364+
(select ty (icmp _ (IntCC.SignedLessThan) x y) x y))
365+
(smin ty x y))
366+
(rule (simplify
367+
(select ty (icmp _ (IntCC.SignedLessThanOrEqual) x y) x y))
368+
(smin ty x y))
369+
(rule (simplify
370+
(select ty (icmp _ (IntCC.UnsignedLessThan) x y) x y))
371+
(umin ty x y))
372+
(rule (simplify
373+
(select ty (icmp _ (IntCC.UnsignedLessThanOrEqual) x y) x y))
374+
(umin ty x y))
375+
376+
377+
;; These are the same rules as above, but when the operands for select are swapped
378+
(rule (simplify
379+
(select ty (icmp _ (IntCC.SignedLessThan) x y) y x))
380+
(smax ty x y))
381+
(rule (simplify
382+
(select ty (icmp _ (IntCC.SignedLessThanOrEqual) x y) y x))
383+
(smax ty x y))
384+
(rule (simplify
385+
(select ty (icmp _ (IntCC.UnsignedLessThan) x y) y x))
386+
(umax ty x y))
387+
(rule (simplify
388+
(select ty (icmp _ (IntCC.UnsignedLessThanOrEqual) x y) y x))
389+
(umax ty x y))
390+
(rule (simplify
391+
(select ty (icmp _ (IntCC.SignedGreaterThan) x y) y x))
392+
(smin ty x y))
393+
(rule (simplify
394+
(select ty (icmp _ (IntCC.SignedGreaterThanOrEqual) x y) y x))
395+
(smin ty x y))
396+
(rule (simplify
397+
(select ty (icmp _ (IntCC.UnsignedGreaterThan) x y) y x))
398+
(umin ty x y))
399+
(rule (simplify
400+
(select ty (icmp _ (IntCC.UnsignedGreaterThanOrEqual) x y) y x))
401+
(umin ty x y))
402+
403+
;; Transform vselect-of-icmp into {u,s}{min,max} instructions where possible.
404+
(rule (simplify
405+
(vselect ty (icmp _ (IntCC.SignedGreaterThan) x y) x y))
406+
(smax ty x y))
407+
(rule (simplify
408+
(vselect ty (icmp _ (IntCC.SignedGreaterThanOrEqual) x y) x y))
409+
(smax ty x y))
410+
(rule (simplify
411+
(vselect ty (icmp _ (IntCC.UnsignedGreaterThan) x y) x y))
412+
(umax ty x y))
413+
(rule (simplify
414+
(vselect ty (icmp _ (IntCC.UnsignedGreaterThanOrEqual) x y) x y))
415+
(umax ty x y))
416+
(rule (simplify
417+
(vselect ty (icmp _ (IntCC.SignedLessThan) x y) x y))
418+
(smin ty x y))
419+
(rule (simplify
420+
(vselect ty (icmp _ (IntCC.SignedLessThanOrEqual) x y) x y))
421+
(smin ty x y))
422+
(rule (simplify
423+
(vselect ty (icmp _ (IntCC.UnsignedLessThan) x y) x y))
424+
(umin ty x y))
425+
(rule (simplify
426+
(vselect ty (icmp _ (IntCC.UnsignedLessThanOrEqual) x y) x y))
427+
(umin ty x y))
428+
429+
;; These are the same rules as above, but when the operands for select are swapped
430+
(rule (simplify
431+
(vselect ty (icmp _ (IntCC.SignedLessThan) x y) y x))
432+
(smax ty x y))
433+
(rule (simplify
434+
(vselect ty (icmp _ (IntCC.SignedLessThanOrEqual) x y) y x))
435+
(smax ty x y))
436+
(rule (simplify
437+
(vselect ty (icmp _ (IntCC.UnsignedLessThan) x y) y x))
438+
(umax ty x y))
439+
(rule (simplify
440+
(vselect ty (icmp _ (IntCC.UnsignedLessThanOrEqual) x y) y x))
441+
(umax ty x y))
442+
(rule (simplify
443+
(vselect ty (icmp _ (IntCC.SignedGreaterThan) x y) y x))
444+
(smin ty x y))
445+
(rule (simplify
446+
(vselect ty (icmp _ (IntCC.SignedGreaterThanOrEqual) x y) y x))
447+
(smin ty x y))
448+
(rule (simplify
449+
(vselect ty (icmp _ (IntCC.UnsignedGreaterThan) x y) y x))
450+
(umin ty x y))
451+
(rule (simplify
452+
(vselect ty (icmp _ (IntCC.UnsignedGreaterThanOrEqual) x y) y x))
453+
(umin ty x y))
454+
455+
;; For floats convert fcmp lt into pseudo_min and gt into pseudo_max
456+
;;
457+
;; fmax_pseudo docs state:
458+
;; The behaviour for this operations is defined as fmax_pseudo(a, b) = (a < b) ? b : a, and the behaviour for zero
459+
;; or NaN inputs follows from the behaviour of < with such inputs.
460+
;;
461+
;; That is exactly the operation that we match here!
462+
(rule (simplify
463+
(select ty (fcmp _ (FloatCC.LessThan) x y) x y))
464+
(fmin_pseudo ty x y))
465+
(rule (simplify
466+
(select ty (fcmp _ (FloatCC.GreaterThan) x y) x y))
467+
(fmax_pseudo ty x y))
468+
469+
;; Do the same for vectors
470+
(rule (simplify
471+
(vselect ty (fcmp _ (FloatCC.LessThan) x y) x y))
472+
(fmin_pseudo ty x y))
473+
(rule (simplify
474+
(vselect ty (fcmp _ (FloatCC.GreaterThan) x y) x y))
475+
(fmax_pseudo ty x y))

cranelift/filetests/filetests/egraph/licm.clif

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ block2(v9: i32):
3131
; check: v8 = iadd v2, v3
3232
; check: brif v6, block2, block1(v8)
3333

34+
3435
; check: block2:
3536
; check: v10 = iconst.i32 1
3637
; check: v4 = iadd.i32 v1, v10
3738
; check: return v4
38-
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
test optimize
2+
set opt_level=speed
3+
set use_egraphs=true
4+
target x86_64
5+
target aarch64
6+
target s390x
7+
target riscv64
8+
9+
function %select_sgt_to_smax(i32, i32) -> i32 {
10+
block0(v0: i32, v1: i32):
11+
v2 = icmp sgt v0, v1
12+
v3 = select v2, v0, v1
13+
return v3
14+
}
15+
16+
; check: block0(v0: i32, v1: i32):
17+
; check: v4 = smax v0, v1
18+
; check: return v4
19+
20+
21+
; This tests an inverted select, where the operands are swapped.
22+
function %select_sgt_to_smax_inverse(i32, i32) -> i32 {
23+
block0(v0: i32, v1: i32):
24+
v2 = icmp sgt v0, v1
25+
v3 = select v2, v1, v0
26+
return v3
27+
}
28+
29+
; check: block0(v0: i32, v1: i32):
30+
; check: v4 = smin v0, v1
31+
; check: return v4
32+
33+
34+
function %select_sge_to_smax(i32, i32) -> i32 {
35+
block0(v0: i32, v1: i32):
36+
v2 = icmp sge v0, v1
37+
v3 = select v2, v0, v1
38+
return v3
39+
}
40+
41+
; check: block0(v0: i32, v1: i32):
42+
; check: v4 = smax v0, v1
43+
; check: return v4
44+
45+
46+
function %select_ugt_to_umax(i32, i32) -> i32 {
47+
block0(v0: i32, v1: i32):
48+
v2 = icmp ugt v0, v1
49+
v3 = select v2, v0, v1
50+
return v3
51+
}
52+
53+
; check: block0(v0: i32, v1: i32):
54+
; check: v4 = umax v0, v1
55+
; check: return v4
56+
57+
58+
function %select_uge_to_umax(i32, i32) -> i32 {
59+
block0(v0: i32, v1: i32):
60+
v2 = icmp uge v0, v1
61+
v3 = select v2, v0, v1
62+
return v3
63+
}
64+
65+
; check: block0(v0: i32, v1: i32):
66+
; check: v4 = umax v0, v1
67+
; check: return v4
68+
69+
70+
71+
function %select_slt_to_smin(i32, i32) -> i32 {
72+
block0(v0: i32, v1: i32):
73+
v2 = icmp slt v0, v1
74+
v3 = select v2, v0, v1
75+
return v3
76+
}
77+
78+
; check: block0(v0: i32, v1: i32):
79+
; check: v4 = smin v0, v1
80+
; check: return v4
81+
82+
83+
function %select_sle_to_smin(i32, i32) -> i32 {
84+
block0(v0: i32, v1: i32):
85+
v2 = icmp sle v0, v1
86+
v3 = select v2, v0, v1
87+
return v3
88+
}
89+
90+
; check: block0(v0: i32, v1: i32):
91+
; check: v4 = smin v0, v1
92+
; check: return v4
93+
94+
95+
function %select_ult_to_umin(i32, i32) -> i32 {
96+
block0(v0: i32, v1: i32):
97+
v2 = icmp ult v0, v1
98+
v3 = select v2, v0, v1
99+
return v3
100+
}
101+
102+
; check: block0(v0: i32, v1: i32):
103+
; check: v4 = umin v0, v1
104+
; check: return v4
105+
106+
107+
function %select_ule_to_umin(i32, i32) -> i32 {
108+
block0(v0: i32, v1: i32):
109+
v2 = icmp ule v0, v1
110+
v3 = select v2, v0, v1
111+
return v3
112+
}
113+
114+
; check: block0(v0: i32, v1: i32):
115+
; check: v4 = umin v0, v1
116+
; check: return v4
117+
118+
119+
120+
function %select_with_different_regs_does_not_optimize(i32, i32, i32, i32) -> i32 {
121+
block0(v0: i32, v1: i32, v2: i32, v3: i32):
122+
v4 = icmp ule v0, v1
123+
v5 = select v4, v2, v3
124+
return v5
125+
}
126+
127+
; check: block0(v0: i32, v1: i32, v2: i32, v3: i32):
128+
; check: v4 = icmp ule v0, v1
129+
; check: v5 = select v4, v2, v3
130+
; check: return v5
131+
132+
133+
134+
135+
function %select_fcmp_gt_to_fmax_pseudo(f32, f32) -> f32 {
136+
block0(v0: f32, v1: f32):
137+
v2 = fcmp gt v0, v1
138+
v3 = select v2, v0, v1
139+
return v3
140+
}
141+
142+
; check: block0(v0: f32, v1: f32):
143+
; check: v4 = fmax_pseudo v0, v1
144+
; check: return v4
145+
146+
function %select_fcmp_lt_to_fmin_pseudo(f32, f32) -> f32 {
147+
block0(v0: f32, v1: f32):
148+
v2 = fcmp lt v0, v1
149+
v3 = select v2, v0, v1
150+
return v3
151+
}
152+
153+
; check: block0(v0: f32, v1: f32):
154+
; check: v4 = fmin_pseudo v0, v1
155+
; check: return v4

0 commit comments

Comments
 (0)