Skip to content

Commit c34b215

Browse files
vbe-sckv-sc
authored andcommitted
[snippy] implement random scheduling deoptimization
This patch implements deoptimization of the generated snippet by reordering instructions. It is guaranteed that the final state of the register file and memory remains the same. The algorithm involves the following steps: 1) A dependency graph of instructions is built based on RAW, WAR, and WAW dependencies. 2) An arbitrary reverse post order (RPO) traversal of this graph is selected. 3) The code is reordered according to the chosen approach. In practice, multiple different graphs must be constructed to preserve control flow as well as the observed behavior regarding instructions with non-trivial semantics (e.g., fence).
1 parent 533c956 commit c34b215

24 files changed

Lines changed: 1243 additions & 0 deletions
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
scheduling:
2+
enabled: true
3+
max-region-size: 1000
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
# RUN: llvm-snippy %s -model-plugin=None -verify-mi |& FileCheck %s
2+
3+
options:
4+
mtriple: riscv64
5+
dump-mf: on
6+
mattr: "+c"
7+
num-instrs: 10000
8+
verify-gen-histogram: on
9+
histogram-must-succeed: on
10+
11+
scheduling:
12+
enabled: on
13+
max-region-size: 250
14+
15+
include:
16+
- Inputs/sections.yaml
17+
18+
histogram-patterns:
19+
- LdSd: "AND * (LD ^ 10) * SUB * (SD ^ 5)"
20+
- C_LdSd: "(C_LW ^ 5) * C_LD | (C_SW ^ 3) * C_SD"
21+
22+
histogram:
23+
- [pattern: LdSd, 11]
24+
- [pattern: C_LdSd, 11]
25+
- [LB, 11]
26+
- [LBU, 11]
27+
- [LH, 11]
28+
- [LHU, 11]
29+
- [LW, 11]
30+
- [LWU, 11]
31+
- [SB, 16]
32+
- [SH, 16]
33+
- [PseudoNOP, 5]
34+
35+
# CHECK-DAG: LB
36+
# CHECK-DAG: LBU
37+
# CHECK-DAG: LH
38+
# CHECK-DAG: LHU
39+
# CHECK-DAG: LW
40+
# CHECK-DAG: SB
41+
# CHECK-DAG: SH
42+
# CHECK-DAG: PseudoNOP
43+
44+
# COM: Chekc LdSd pattern
45+
# CHECK: AND
46+
# CHECK-NEXT: LD
47+
# CHECK-NEXT: LD
48+
# CHECK-NEXT: LD
49+
# CHECK-NEXT: LD
50+
# CHECK-NEXT: LD
51+
# CHECK-NEXT: LD
52+
# CHECK-NEXT: LD
53+
# CHECK-NEXT: LD
54+
# CHECK-NEXT: LD
55+
# CHECK-NEXT: LD
56+
# CHECK-NEXT: SUB
57+
# CHECK-NEXT: SD
58+
# CHECK-NEXT: SD
59+
# CHECK-NEXT: SD
60+
# CHECK-NEXT: SD
61+
# CHECK-NEXT: SD
62+
63+
# COM: Chekc LdSd pattern
64+
# CHECK: AND
65+
# CHECK-NEXT: LD
66+
# CHECK-NEXT: LD
67+
# CHECK-NEXT: LD
68+
# CHECK-NEXT: LD
69+
# CHECK-NEXT: LD
70+
# CHECK-NEXT: LD
71+
# CHECK-NEXT: LD
72+
# CHECK-NEXT: LD
73+
# CHECK-NEXT: LD
74+
# CHECK-NEXT: LD
75+
# CHECK-NEXT: SUB
76+
# CHECK-NEXT: SD
77+
# CHECK-NEXT: SD
78+
# CHECK-NEXT: SD
79+
# CHECK-NEXT: SD
80+
# CHECK-NEXT: SD
81+
82+
# COM: Chekc C_LdSd pattern
83+
# CHECK: C_LW
84+
# CHECK-NEXT: C_LW
85+
# CHECK-NEXT: C_LW
86+
# CHECK-NEXT: C_LW
87+
# CHECK-NEXT: C_LW
88+
# CHECK-NEXT: C_LD
89+
90+
# COM: Chekc C_LdSd pattern
91+
# CHECK: C_SW
92+
# CHECK-NEXT: C_SW
93+
# CHECK-NEXT: C_SW
94+
# CHECK-NEXT: C_SD
95+
96+
# COM: Chekc C_LdSd pattern
97+
# CHECK: C_LW
98+
# CHECK-NEXT: C_LW
99+
# CHECK-NEXT: C_LW
100+
# CHECK-NEXT: C_LW
101+
# CHECK-NEXT: C_LW
102+
# CHECK-NEXT: C_LD
103+
104+
# COM: Chekc C_LdSd pattern
105+
# CHECK: C_SW
106+
# CHECK-NEXT: C_SW
107+
# CHECK-NEXT: C_SW
108+
# CHECK-NEXT: C_SD
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# REQUIRES: asserts
2+
# RUN: llvm-snippy %s \
3+
# RUN: -mtriple=riscv64 -model-plugin=None \
4+
# RUN: -num-instrs=10000 -init-regs-in-elf -debug-only=snippy-riscv \
5+
# RUN: -initial-regs=%S/initialization/Inputs/initial_state_rv64i.yaml \
6+
# RUN: -random-scheduling \
7+
# RUN: |& FileCheck %s
8+
9+
options:
10+
mattr: +zifencei
11+
sections:
12+
- name: 0
13+
VMA: 0x10000
14+
SIZE: 0x40000
15+
LMA: 0x10000
16+
ACCESS: r
17+
- name: 1
18+
VMA: 0x100000
19+
SIZE: 0x100000
20+
LMA: 0x100000
21+
ACCESS: rx
22+
- name: 2
23+
VMA: 0x210000
24+
SIZE: 0x100000
25+
LMA: 0x210000
26+
ACCESS: rw
27+
28+
histogram:
29+
# branches
30+
- [BEQ, 3.7]
31+
- [BGE, 3.7]
32+
- [BGEU, 3.7]
33+
- [BLT, 3.7]
34+
- [BLTU, 3.7]
35+
- [BNE, 3.7]
36+
- [ECALL, 10.0]
37+
- [WFI, 10.0]
38+
- [FENCE, 10.0]
39+
- [FENCE_I, 10.0]
40+
- [SFENCE_VMA, 10.0]
41+
- [UNIMP, 10.0]
42+
- [AUIPC, 10.0]
43+
- [SW, 1.0]
44+
- [ADD, 1.0]
45+
- [XOR, 1.0]
46+
- [AND, 1.0]
47+
- [SUB, 1.0]
48+
branches:
49+
number-of-loop-iterations:
50+
min: 1
51+
max: 2
52+
max-depth:
53+
if: 3
54+
loop: 3
55+
56+
# CHECK-DAG: Unsupported scheduling instruction : {{((\$x([0-9]+)))}} = AUIPC
57+
# CHECK-DAG: Unsupported scheduling instruction : ECALL
58+
# CHECK-DAG: Unsupported scheduling instruction : WFI
59+
# CHECK-DAG: Unsupported scheduling instruction : FENCE
60+
# CHECK-DAG: Unsupported scheduling instruction : FENCE_I
61+
# CHECK-DAG: Unsupported scheduling instruction : SFENCE_VMA
62+
# CHECK-DAG: Unsupported scheduling instruction : UNIMP
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
# RUN: llvm-snippy %s | FileCheck %s
2+
3+
options:
4+
mtriple: riscv64
5+
mattr: "+m,+v"
6+
num-instrs: 10000
7+
dump-mf: on
8+
verify-gen-histogram: on
9+
histogram-must-succeed: on
10+
11+
include:
12+
- Inputs/sections.yaml
13+
14+
scheduling:
15+
enabled: on
16+
17+
burst:
18+
mode: custom
19+
min-size: 5
20+
max-size: 5
21+
groupings:
22+
- [LD]
23+
- [SD]
24+
- [LW]
25+
- [SW]
26+
- [VLE8_V]
27+
- [VADD_VX]
28+
- [VLSEG2E32_V]
29+
- [VL2RE8_V]
30+
- [VS2R_V]
31+
32+
histogram:
33+
- [LD, 1.0]
34+
- [SD, 1.0]
35+
- [LW, 1.0]
36+
- [SW, 1.0]
37+
- [VSETVL, 0.5]
38+
- [VLE8_V, 1.0]
39+
- [VADD_VX, 1.0]
40+
- [VLSEG2E32_V, 1.0]
41+
- [VL2RE8_V, 1.0]
42+
- [VS2R_V, 1.0]
43+
- [MUL, 1.0]
44+
- [DIV, 1.0]
45+
- [SUB, 1.0]
46+
47+
# CHECK-DAG: MUL
48+
# CHECK-DAG: DIV
49+
# CHECK-DAG: SUB
50+
51+
# CHECK: LD
52+
# CHECK-NEXT: LD
53+
# CHECK-NEXT: LD
54+
# CHECK-NEXT: LD
55+
# CHECK-NEXT: LD
56+
57+
# CHECK: SD
58+
# CHECK-NEXT: SD
59+
# CHECK-NEXT: SD
60+
# CHECK-NEXT: SD
61+
# CHECK-NEXT: SD
62+
63+
# CHECK: LW
64+
# CHECK-NEXT: LW
65+
# CHECK-NEXT: LW
66+
# CHECK-NEXT: LW
67+
# CHECK-NEXT: LW
68+
69+
# CHECK: SW
70+
# CHECK-NEXT: SW
71+
# CHECK-NEXT: SW
72+
# CHECK-NEXT: SW
73+
# CHECK-NEXT: SW
74+
75+
# CHECK: VLE8_V
76+
# CHECK-NEXT: VLE8_V
77+
# CHECK-NEXT: VLE8_V
78+
# CHECK-NEXT: VLE8_V
79+
# CHECK-NEXT: VLE8_V
80+
81+
# CHECK: VADD_V
82+
# CHECK-NEXT: VADD_V
83+
# CHECK-NEXT: VADD_V
84+
# CHECK-NEXT: VADD_V
85+
# CHECK-NEXT: VADD_V
86+
87+
# CHECK: VLSEG2E32_V
88+
# CHECK-NEXT: VLSEG2E32_V
89+
# CHECK-NEXT: VLSEG2E32_V
90+
# CHECK-NEXT: VLSEG2E32_V
91+
# CHECK-NEXT: VLSEG2E32_V
92+
93+
# CHECK: VL2RE8_V
94+
# CHECK-NEXT: VL2RE8_V
95+
# CHECK-NEXT: VL2RE8_V
96+
# CHECK-NEXT: VL2RE8_V
97+
# CHECK-NEXT: VL2RE8_V
98+
99+
# CHECK: VS2R_V
100+
# CHECK-NEXT: VS2R_V
101+
# CHECK-NEXT: VS2R_V
102+
# CHECK-NEXT: VS2R_V
103+
# CHECK-NEXT: VS2R_V
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# REQUIRES: asserts
2+
# RUN: llvm-snippy %S/Inputs/jal-layout.yaml %s \
3+
# RUN: -mtriple=riscv64 -enable-static-stack=false \
4+
# RUN: -num-instrs=10000 -mattr=+c \
5+
# RUN: -initial-regs=%S/initialization/Inputs/initial_state_rv64i.yaml \
6+
# RUN: -o %t_1
7+
8+
# RUN: llvm-snippy %S/Inputs/jal-layout.yaml %s \
9+
# RUN: -mtriple=riscv64 -enable-static-stack=false \
10+
# RUN: -num-instrs=10000 -mattr=+c -debug-only=snippy-riscv \
11+
# RUN: -initial-regs=%S/initialization/Inputs/initial_state_rv64i.yaml \
12+
# RUN: -o %t_2 -random-scheduling \
13+
# RUN: |& FileCheck %s
14+
15+
# RUN: not diff -q %t_1.elf %t_2.elf
16+
call-graph:
17+
entry-point: SnippyFunction
18+
function-list:
19+
- name: SnippyFunction
20+
callees:
21+
- fun1
22+
- fun2
23+
- name: fun1
24+
callees:
25+
- fun2
26+
- name: fun2
27+
callees:
28+
- fun3
29+
- name: fun3
30+
31+
# CHECK: Currently unsupported scheduling instruction : ${{[a-z0-9]+}} = PseudoJALCall
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
# RUN: llvm-snippy %s \
2+
# RUN: -mtriple=riscv64 \
3+
# RUN: -num-instrs=10000 -init-regs-in-elf -o %t_1 \
4+
# RUN: -mattr=+c,+f \
5+
# RUN: -initial-regs=%S/initialization/Inputs/initial_state_rv64i.yaml
6+
7+
# RUN: llvm-snippy %s \
8+
# RUN: -mtriple=riscv64 \
9+
# RUN: -num-instrs=10000 -init-regs-in-elf -o %t_2 \
10+
# RUN: -mattr=+c,+f -random-scheduling \
11+
# RUN: -initial-regs=%S/initialization/Inputs/initial_state_rv64i.yaml
12+
13+
# RUN: not diff -q %t_1.elf %t_2.elf
14+
15+
sections:
16+
- name: 1
17+
VMA: 0x100000
18+
SIZE: 0x1000000
19+
LMA: 0x100000
20+
ACCESS: rx
21+
- name: 2
22+
VMA: 0x1100000
23+
SIZE: 0x100000
24+
LMA: 0x1100000
25+
ACCESS: rw
26+
27+
histogram:
28+
# branches ~ every 30 instructions
29+
- [BEQ, 3.7]
30+
- [BGE, 3.7]
31+
- [BGEU, 3.7]
32+
- [BLT, 3.7]
33+
- [BLTU, 3.7]
34+
- [BNE, 3.7]
35+
- [C_BEQZ, 3.7]
36+
- [C_BNEZ, 3.7]
37+
- [C_J, 3.7]
38+
# Floats
39+
- [FMADD_S, 1.0]
40+
- [FMSUB_S, 1.0]
41+
- [FNMSUB_S, 1.0]
42+
- [FNMADD_S, 1.0]
43+
- [FADD_S, 1.0]
44+
- [FSUB_S, 1.0]
45+
- [FMUL_S, 1.0]
46+
- [FDIV_S, 1.0]
47+
- [FSQRT_S, 1.0]
48+
- [FSGNJ_S, 1.0]
49+
- [FSGNJN_S, 1.0]
50+
- [FSGNJX_S, 1.0]
51+
- [FMIN_S, 1.0]
52+
- [FMAX_S, 1.0]
53+
- [FCVT_W_S, 1.0]
54+
- [FCVT_WU_S, 1.0]
55+
- [FMV_X_W, 1.0]
56+
- [FEQ_S, 1.0]
57+
- [FLT_S, 1.0]
58+
- [FLE_S, 1.0]
59+
- [FCLASS_S, 1.0]
60+
- [FCVT_S_W, 1.0]
61+
- [FCVT_S_WU, 1.0]
62+
- [FMV_W_X, 1.0]
63+
- [FCVT_L_S, 1.0]
64+
- [FCVT_LU_S, 1.0]
65+
- [FCVT_S_L, 1.0]
66+
- [FCVT_S_LU, 1.0]
67+
- [FLW, 1.0]
68+
- [FSW, 1.0]
69+
- [SW, 5.0]
70+
- [ADD, 5.0]
71+
- [XOR, 5.0]
72+
- [AND, 5.0]
73+
- [SUB, 5.0]
74+
branches:
75+
number-of-loop-iterations:
76+
min: 1
77+
max: 2
78+
max-depth:
79+
if: 3
80+
loop: 3
81+

0 commit comments

Comments
 (0)