-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathuvg_analyze.jou
More file actions
272 lines (232 loc) · 10.4 KB
/
Copy pathuvg_analyze.jou
File metadata and controls
272 lines (232 loc) · 10.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
# See doc/compiler_internals/uvg.md
import "stdlib/assert.jou"
import "stdlib/str.jou"
import "stdlib/list.jou"
import "stdlib/mem.jou"
import "./uvg.jou"
import "./types.jou"
import "./errors_and_warnings.jou"
# Values of variable statuses are designed so that statuses coming from
# different branches can be combined together with the bitwise "|" (or)
# operator.
#
# For example, "1 | 2 = 3" means that if a variable has a garbage value in
# branch A (1) and a non-garbage value in branch B (2), and A and B both jump
# into a third branch C, then status in C is "may or may not be garbage" (3).
const UNVISITED: uint8 = 0b000 # Don't know anything about this variable yet.
const UNDEFINED: uint8 = 0b001 # Holds a garbage value.
const DEFINED: uint8 = 0b010 # This variable has been set to some non-garbage value.
const POSSIBLY_UNDEFINED: uint8 = 0b011 # Could hold a garbage or non-garbage value.
const DONT_ANALYZE: uint8 = 0b111 # The "don't analyze" UVG instruction has been used.
if False:
def print_statuses(uvg: Uvg*, statuses: uint8*) -> None:
printf("VAR STATUSES:")
for v = 0; v < uvg.varnames.len; v++:
printf(" %s=", uvg.varnames[v])
match statuses[v]:
case UNVISITED:
printf("{}")
case UNDEFINED:
printf("{u}")
case DEFINED:
printf("{d}")
case POSSIBLY_UNDEFINED:
printf("{u,d}")
case DONT_ANALYZE:
printf("-")
case _:
printf("?")
printf("\n")
def build_statuses_at_end_before_analyzing(uvg: Uvg*) -> uint8**:
# statuses_at_end[b][v] = status of variable v at end of block b
statuses_at_end: uint8** = malloc(sizeof(statuses_at_end[0]) * uvg.blocks.len)
assert statuses_at_end != NULL
for b = 0; b < uvg.blocks.len; b++:
statuses_at_end[b] = malloc(uvg.varnames.len)
if uvg.varnames.len != 0:
assert statuses_at_end[b] != NULL
memset(statuses_at_end[b], UNVISITED, uvg.varnames.len)
return statuses_at_end
def build_statuses_at_start_of_block(uvg: Uvg*, statuses_at_end: uint8**, block: UvgBlock*) -> uint8*:
n = uvg.varnames.len
if n == 0:
return NULL
statuses: uint8* = malloc(n)
assert statuses != NULL
if block == uvg.blocks.ptr[0]:
# The start block, everything is initially undefined
memset(statuses, UNDEFINED, n)
else:
memset(statuses, UNVISITED, n)
# Merge statuses of source blocks that jump to the current block
for b = 0; b < uvg.blocks.len; b++:
if uvg.blocks.ptr[b].jumps_to(block):
# This function is O(n^2) but barely so, the following loop should be really fast in practice
for v = 0; v < n; v++:
statuses[v] |= statuses_at_end[b][v]
return statuses
def handle_missing_return_statement(uvg: Uvg*, use_of_the_special_return_variable: UvgInstruction*) -> None:
# Check if there is a "return" statement that user wrote. Because this is
# only for functions that are supposed to return a value, such a return
# statement always sets the return value, even if it's an inlined function.
n = 0
for b = 0; b < uvg.blocks.len; b++:
for b_ins = uvg.blocks.ptr[b].instructions.ptr; b_ins < uvg.blocks.ptr[b].instructions.end(); b_ins++:
if (
b_ins.kind == UvgInstructionKind.Set
and b_ins.var == use_of_the_special_return_variable.var
):
n++
if use_of_the_special_return_variable.inlining_signature == NULL:
sig = uvg.signature
prefix = ""
else:
sig = use_of_the_special_return_variable.inlining_signature
prefix = "inlined "
msg: byte[500]
if n == 0:
# There are no return statements. Tell user to add them and why they are needed.
snprintf(
msg,
sizeof(msg),
"%s%s '%s' must return a value, because it is defined with '-> %s'",
prefix,
sig.function_or_method(),
sig.name,
sig.return_type.name,
)
fail(use_of_the_special_return_variable.location, msg)
else:
# There are some return statements but they don't cover all cases.
snprintf(
msg,
sizeof(msg),
"%s%s '%s' doesn't seem to return a value in all cases",
prefix,
sig.function_or_method(),
sig.name,
)
show_warning(use_of_the_special_return_variable.location, msg)
def show_undefined_variable_warning(ins: UvgInstruction*, varname: byte*, sure: bool) -> None:
if sure:
what_we_say = "is undefined"
else:
what_we_say = "may be undefined"
msg: byte[500]
if ins.inlining_signature == NULL:
snprintf(msg, sizeof(msg), "the value of '%s' %s", varname, what_we_say)
else:
snprintf(
msg,
sizeof(msg),
"the value of '%s' inside inlined %s '%s' %s",
varname,
ins.inlining_signature.function_or_method(),
ins.inlining_signature.name,
what_we_say,
)
show_warning(ins.location, msg)
def update_statuses_based_on_instructions(uvg: Uvg*, statuses: uint8*, block: UvgBlock*, warn: bool) -> None:
for ins = block.instructions.ptr; ins < block.instructions.end(); ins++:
match ins.kind:
case UvgInstructionKind.Set:
if statuses[ins.var] != DONT_ANALYZE:
statuses[ins.var] = DEFINED
case UvgInstructionKind.Use:
if warn and strcmp(uvg.varnames.ptr[ins.var], "$anonymous") != 0:
match statuses[ins.var]:
case UNDEFINED:
if strcmp(uvg.varnames.ptr[ins.var], "return") == 0:
handle_missing_return_statement(uvg, ins)
else:
show_undefined_variable_warning(ins, uvg.varnames.ptr[ins.var], True)
case POSSIBLY_UNDEFINED:
if strcmp(uvg.varnames.ptr[ins.var], "return") == 0:
handle_missing_return_statement(uvg, ins)
else:
show_undefined_variable_warning(ins, uvg.varnames.ptr[ins.var], False)
case UvgInstructionKind.Statement:
pass
case UvgInstructionKind.DontAnalyze:
statuses[ins.var] = DONT_ANALYZE
def analyze_block(uvg: Uvg*, statuses_at_end: uint8**, blockidx: int, warn: bool) -> bool:
statuses = build_statuses_at_start_of_block(uvg, statuses_at_end, uvg.blocks.ptr[blockidx])
update_statuses_based_on_instructions(uvg, statuses, uvg.blocks.ptr[blockidx], warn)
if memcmp(statuses, statuses_at_end[blockidx], sizeof(statuses[0]) * uvg.varnames.len) != 0:
# Statuses changed
assert not warn # warnings should be applied at the end when statuses are stable
free(statuses_at_end[blockidx])
statuses_at_end[blockidx] = statuses
return True
else:
free(statuses)
return False
def find_true(arr: bool*, len: int64) -> int:
for i = 0; i < len; i++:
if arr[i]:
return i
return -1
def show_block_unreachable_warning(b: UvgBlock*) -> bool:
for ins = b.instructions.ptr; ins < b.instructions.end(); ins++:
if ins.kind == UvgInstructionKind.Statement:
# TODO: Make this aware of inlining!
show_warning(ins.location, "this code will never run")
return True
return False
# This function sets marked[i] to True whenever blocks[i] is reachable from blocks[start].
# Block A is reachable from block B, if B may jump (perhaps through other blocks) into A.
#
# If bidirectional is True, either block may be the one that jumps into the other.
# This is used to show only one unreachable warning when an unreachable part of code
# consists of many blocks.
#
# If ignore is not NULL and ignore[i] is True, we pretend that blocks[i] doesn't exist.
def mark_reachable_blocks(blocks: List[UvgBlock*], ignore: bool*, marked: bool*, bidirectional: bool, start: int) -> None:
assert 0 <= start and start < blocks.len
assert ignore == NULL or not ignore[start]
assert not marked[start]
marked[start] = True
for i = 0; i < blocks.len; i++:
if (
(ignore == NULL or not ignore[i])
and not marked[i]
and (blocks.ptr[start].jumps_to(blocks.ptr[i]) or (bidirectional and blocks.ptr[i].jumps_to(blocks.ptr[start])))
):
mark_reachable_blocks(blocks, ignore, marked, bidirectional, i)
@public
def uvg_analyze(uvg: Uvg*) -> None:
assert uvg.blocks.len >= 1 # must have at least start block
queue: bool* = calloc(sizeof(queue[0]), uvg.blocks.len)
reachable: bool* = calloc(sizeof(reachable[0]), uvg.blocks.len)
u_warned: bool* = calloc(sizeof(u_warned[0]), uvg.blocks.len)
assert queue != NULL
assert reachable != NULL
assert u_warned != NULL
mark_reachable_blocks(uvg.blocks, NULL, reachable, False, 0)
# Analyze blocks until we arrive at a stable state where variable statuses don't change.
statuses_at_end = build_statuses_at_end_before_analyzing(uvg)
queue[0] = True
while True:
blockidx = find_true(queue, uvg.blocks.len)
if blockidx == -1:
break
queue[blockidx] = False
if analyze_block(uvg, statuses_at_end, blockidx, False):
for b = 0; b < uvg.blocks.len; b++:
if uvg.blocks.ptr[blockidx].jumps_to(uvg.blocks.ptr[b]):
queue[b] = True
for b = 0; b < uvg.blocks.len; b++:
if reachable[b]:
analyze_block(uvg, statuses_at_end, b, True)
elif not u_warned[b]:
# Attempt to show unreachable warning. Will fail if the block is
# basically empty, then its unreachability doesn't matter.
if show_block_unreachable_warning(uvg.blocks.ptr[b]):
# Warning was shown. Do not show it for related unreachable blocks.
mark_reachable_blocks(uvg.blocks, reachable, u_warned, True, b)
free(queue)
free(reachable)
free(u_warned)
for b = 0; b < uvg.blocks.len; b++:
free(statuses_at_end[b])
free(statuses_at_end)