-
Notifications
You must be signed in to change notification settings - Fork 203
/
Copy pathriscv_insts_vext_utils.sail
491 lines (442 loc) · 19 KB
/
riscv_insts_vext_utils.sail
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
/*=======================================================================================*/
/* This Sail RISC-V architecture model, comprising all files and */
/* directories except where otherwise noted is subject the BSD */
/* two-clause license in the LICENSE file. */
/* */
/* SPDX-License-Identifier: BSD-2-Clause */
/*=======================================================================================*/
/* ******************************************************************************* */
/* This file implements functions used by vector instructions. */
/* ******************************************************************************* */
/* NFIELDS Encoding
* typically, nf is in range 1 to 8, but for 'Vector Load/Store Whole Register Instructions',
* only NFIELDS values of 1, 2, 4, 8 are supported, with other values reserved.
*/
type nfields_range('q) = 'q > 0 & 'q <= 8
type nfields_range_pow2('q) = 'q in { 1, 2, 4, 8 }
type nfields = { 'q, nfields_range('q). int('q) }
/* Vector mask mapping */
mapping maybe_vmask : string <-> bits(1) = {
"" <-> 0b1, /* unmasked by default */
sep() ^ "v0.t" <-> 0b0
}
/* Check for valid EEW and EMUL values in:
* 1. vector widening/narrowing instructions
* 2. vector load/store instructions
*/
val valid_eew_emul : (int, int) -> bool
function valid_eew_emul(EEW, EMUL_pow) = {
let ELEN = 2 ^ get_elen_pow();
EEW >= 8 & EEW <= ELEN & EMUL_pow >= -3 & EMUL_pow <= 3
}
/* Check for valid vtype setting
* 1. If the vill bit is set, then any attempt to execute a vector instruction that depends upon vtype will raise an illegal instruction exception.
* 2. vset{i}vl{i} and whole-register loads, stores, and moves do not depend upon vtype.
*/
val valid_vtype : unit -> bool
function valid_vtype() = {
vtype[vill] == 0b0
}
/* Check for vstart value */
val assert_vstart : int -> bool
function assert_vstart(i) = {
unsigned(vstart) == i
}
/* Check for valid destination register when vector masking is enabled:
* The destination vector register group for a masked vector instruction
* cannot overlap the source mask register (v0),
* unless the destination vector register is being written with a mask value (e.g., compares)
* or the scalar result of a reduction.
*/
val valid_rd_mask : (vregidx, bits(1)) -> bool
function valid_rd_mask(rd, vm) = {
vm != 0b0 | rd != zvreg
}
/* Check for valid register overlap in vector widening/narrowing instructions:
* In a widening instruction, the overlap is valid only in the highest-numbered part
* of the destination register group, and the source EMUL is at least 1.
* In a narrowing instruction, the overlap is valid only in the lowest-numbered part
* of the source register group.
*/
val valid_reg_overlap : (vregidx, vregidx, int, int) -> bool
function valid_reg_overlap(rs, rd, EMUL_pow_rs, EMUL_pow_rd) = {
let rs_group = if EMUL_pow_rs > 0 then 2 ^ EMUL_pow_rs else 1;
let rd_group = if EMUL_pow_rd > 0 then 2 ^ EMUL_pow_rd else 1;
let rs_int = unsigned(vregidx_bits(rs));
let rd_int = unsigned(vregidx_bits(rd));
if EMUL_pow_rs < EMUL_pow_rd then {
(rs_int + rs_group <= rd_int) | (rs_int >= rd_int + rd_group) |
((rs_int + rs_group == rd_int + rd_group) & (EMUL_pow_rs >= 0))
} else if EMUL_pow_rs > EMUL_pow_rd then {
(rd_int <= rs_int) | (rd_int >= rs_int + rs_group)
} else true;
}
/* Check for valid register grouping in vector segment load/store instructions:
* The EMUL of load vd or store vs3 times the number of fields per segment
* must not be larger than 8. (EMUL * NFIELDS <= 8)
*/
val valid_segment : (nfields, int) -> bool
function valid_segment(nf, EMUL_pow) = {
if EMUL_pow < 0 then nf / (2 ^ (0 - EMUL_pow)) <= 8
else nf * 2 ^ EMUL_pow <= 8
}
/* ******************************************************************************* */
/* The following functions summarize patterns of illegal instruction check. */
/* ******************************************************************************* */
/* a. Normal check including vtype.vill field and vd/v0 overlap if vm = 0 */
val illegal_normal : (vregidx, bits(1)) -> bool
function illegal_normal(vd, vm) = {
not(valid_vtype()) | not(valid_rd_mask(vd, vm))
}
/* b. Masked check for instructions encoded with vm = 0 */
val illegal_vd_masked : vregidx -> bool
function illegal_vd_masked(vd) = {
not(valid_vtype()) | vd == zvreg
}
/* c. Unmasked check for:
* 1. instructions encoded with vm = 1
* 2. instructions with scalar rd: vcpop.m, vfirst.m
* 3. vd as mask register (eew = 1):
* vmadc.vvm/vxm/vim, vmsbc.vvm/vxm, mask logical, integer compare, vlm.v, vsm.v
*/
val illegal_vd_unmasked : unit -> bool
function illegal_vd_unmasked() = {
not(valid_vtype())
}
/* d. Variable width check for:
* 1. integer/fixed-point widening/narrowing instructions
* 2. vector integer extension: vzext, vsext
*/
val illegal_variable_width : (vregidx, bits(1), int, int) -> bool
function illegal_variable_width(vd, vm, SEW_new, LMUL_pow_new) = {
not(valid_vtype()) | not(valid_rd_mask(vd, vm)) | not(valid_eew_emul(SEW_new, LMUL_pow_new))
}
/* e. Normal check for reduction instructions:
* The destination vector register can overlap the source operands, including the mask register.
* Vector reduction operations raise an illegal instruction exception if vstart is non-zero.
*/
val illegal_reduction : unit -> bool
function illegal_reduction() = {
not(valid_vtype()) | not(assert_vstart(0))
}
/* f. Variable width check for widening reduction instructions */
val illegal_reduction_widen : (int, int) -> bool
function illegal_reduction_widen(SEW_widen, LMUL_pow_widen) = {
not(valid_vtype()) | not(assert_vstart(0)) | not(valid_eew_emul(SEW_widen, LMUL_pow_widen))
}
/* g. Non-indexed load instruction check */
val illegal_load : (vregidx, bits(1), nfields, int, int) -> bool
function illegal_load(vd, vm, nf, EEW, EMUL_pow) = {
not(valid_vtype()) | not(valid_rd_mask(vd, vm)) |
not(valid_eew_emul(EEW, EMUL_pow)) | not(valid_segment(nf, EMUL_pow))
}
/* h. Non-indexed store instruction check (with vs3 rather than vd) */
val illegal_store : (nfields, int, int) -> bool
function illegal_store(nf, EEW, EMUL_pow) = {
not(valid_vtype()) | not(valid_eew_emul(EEW, EMUL_pow)) | not(valid_segment(nf, EMUL_pow))
}
/* i. Indexed load instruction check */
val illegal_indexed_load : (vregidx, bits(1), nfields, int, int, int) -> bool
function illegal_indexed_load(vd, vm, nf, EEW_index, EMUL_pow_index, EMUL_pow_data) = {
not(valid_vtype()) | not(valid_rd_mask(vd, vm)) |
not(valid_eew_emul(EEW_index, EMUL_pow_index)) | not(valid_segment(nf, EMUL_pow_data))
}
/* j. Indexed store instruction check (with vs3 rather than vd) */
val illegal_indexed_store : (nfields, int, int, int) -> bool
function illegal_indexed_store(nf, EEW_index, EMUL_pow_index, EMUL_pow_data) = {
not(valid_vtype()) | not(valid_eew_emul(EEW_index, EMUL_pow_index)) |
not(valid_segment(nf, EMUL_pow_data))
}
/* Scalar register shaping */
val get_scalar : forall 'm, 'm >= 8. (regidx, int('m)) -> bits('m)
function get_scalar(rs1, SEW) = {
if SEW <= xlen then {
/* Least significant SEW bits */
X(rs1)[SEW - 1 .. 0]
} else {
/* Sign extend to SEW */
sign_extend(SEW, X(rs1))
}
}
/* Extracts 4 consecutive vector elements starting from index 4*i */
val get_velem_quad : forall 'n 'm 'p, 'n > 0 & 'm > 0 & 'p >= 0 & 4 * 'p + 3 < 'n. (vector('n, bits('m)), int('p)) -> bits(4 * 'm)
function get_velem_quad(v, i) = v[4 * i + 3] @ v[4 * i + 2] @ v[4 * i + 1] @ v[4 * i]
/* Divide the input bitvector into 4 equal slices and store them in vd starting at position 4*i */
val write_velem_quad : forall 'p 'n 'm, 8 <= 'n <= 64 & 'm > 0 & 'p >= 0. (vregidx, int('n), bits('m), int('p)) -> unit
function write_velem_quad(vd, SEW, input, i) = {
foreach(j from 0 to 3)
write_single_element(SEW, 4 * i + j, vd, slice(input, j * SEW, SEW));
}
/* Extracts 4 consecutive vector elements starting from index 4*i and returns a vector */
val get_velem_quad_vec : forall 'n 'm 'p, 'n > 0 & 8 <= 'm <= 64 & 'p >= 0 & 4 * 'p + 3 < 'n. (vector('n, bits('m)), int('p)) -> vector(4, bits('m))
function get_velem_quad_vec(v, i) = [ v[4 * i + 3], v[4 * i + 2], v[4 * i + 1], v[4 * i] ]
/* Writes each of the 4 elements from the input vector to the vector register vd, starting at position 4 * i */
val write_velem_quad_vec : forall 'p 'n, 8 <= 'n <= 64 & 'p >= 0. (vregidx, int('n), vector(4, bits('n)), int('p)) -> unit
function write_velem_quad_vec(vd, SEW, input, i) = {
foreach(j from 0 to 3)
write_single_element(SEW, 4 * i + j, vd, input[j]);
}
/* Get the starting element index from csr vtype */
val get_start_element : unit -> result(nat, unit)
function get_start_element() = {
let start_element = unsigned(vstart);
let VLEN_pow = get_vlen_pow();
let SEW_pow = get_sew_pow();
/* The use of vstart values greater than the largest element
* index for the current SEW setting is reserved.
*
* TODO: the bound here might be incorrect.
* See https://github.com/riscv/sail-riscv/pull/755#discussion_r2035095825
*/
if start_element > (2 ^ (3 + VLEN_pow - SEW_pow) - 1)
then Err(())
else Ok(start_element)
}
/* Get the ending element index from csr vl */
val get_end_element : unit -> int
function get_end_element() = unsigned(vl) - 1
/* Mask handling; creates a pre-masked result vector for vstart, vl, vta/vma, and vm */
/* vm should be baked into vm_val from doing read_vmask */
/* tail masking when lmul < 1 is handled in write_vreg */
/* Returns two vectors:
* vector1 is the result vector with values applied to masked elements
* vector2 is a "mask" vector that is true for an element if the corresponding element
* in the result vector should be updated by the calling instruction
*/
val init_masked_result : forall 'n 'm 'p, 'n >= 0 & 'm >= 0. (int('n), int('m), int('p), vector('n, bits('m)), bits('n)) -> result((vector('n, bits('m)), bits('n)), unit)
function init_masked_result(num_elem, SEW, LMUL_pow, vd_val, vm_val) = {
let start_element : nat = match get_start_element() {
Ok(v) => v,
Err(()) => return Err(())
};
let end_element = get_end_element();
let tail_ag : agtype = get_vtype_vta();
let mask_ag : agtype = get_vtype_vma();
var mask : bits('n) = undefined;
var result : vector('n, bits('m)) = undefined;
/* Determine the actual number of elements when lmul < 1 */
let real_num_elem = if LMUL_pow >= 0 then num_elem else num_elem / (2 ^ (0 - LMUL_pow));
assert(num_elem >= real_num_elem);
foreach (i from 0 to (num_elem - 1)) {
if i < start_element then {
/* Prestart elements defined by vstart */
result[i] = vd_val[i];
mask[i] = bitzero
} else if i > end_element then {
/* Tail elements defined by vl */
result[i] = match tail_ag {
UNDISTURBED => vd_val[i],
AGNOSTIC => vd_val[i] /* TODO: configuration support */
};
mask[i] = bitzero
} else if i >= real_num_elem then {
/* Tail elements defined by lmul < 1 */
result[i] = match tail_ag {
UNDISTURBED => vd_val[i],
AGNOSTIC => vd_val[i] /* TODO: configuration support */
};
mask[i] = bitzero
} else if vm_val[i] == bitzero then {
/* Inactive body elements defined by vm */
result[i] = match mask_ag {
UNDISTURBED => vd_val[i],
AGNOSTIC => vd_val[i] /* TODO: configuration support */
};
mask[i] = bitzero
} else {
/* Active body elements */
mask[i] = bitone;
}
};
Ok((result, mask))
}
/* For instructions like vector reduction and vector store,
* masks on prestart, inactive and tail elements only affect the validation of source register elements
* (vs3 for store and vs2 for reduction). There's no destination register to be masked.
* In these cases, this function can be called to simply get the mask vector for vs (without the prepared vd result vector).
*/
val init_masked_source : forall 'n 'p, 'n > 0. (int('n), int('p), bits('n)) -> result(bits('n), unit)
function init_masked_source(num_elem, LMUL_pow, vm_val) = {
let start_element : nat = match get_start_element() {
Ok(v) => v,
Err(()) => return Err(())
};
let end_element = get_end_element();
var mask : bits('n) = undefined;
/* Determine the actual number of elements when lmul < 1 */
let real_num_elem = if LMUL_pow >= 0 then num_elem else num_elem / (2 ^ (0 - LMUL_pow));
assert(num_elem >= real_num_elem);
foreach (i from 0 to (num_elem - 1)) {
if i < start_element then {
/* Prestart elements defined by vstart */
mask[i] = bitzero
} else if i > end_element then {
/* Tail elements defined by vl */
mask[i] = bitzero
} else if i >= real_num_elem then {
/* Tail elements defined by lmul < 1 */
mask[i] = bitzero
} else if vm_val[i] == bitzero then {
/* Inactive body elements defined by vm */
mask[i] = bitzero
} else {
/* Active body elements */
mask[i] = bitone;
}
};
Ok(mask)
}
/* Mask handling for carry functions that use masks as input/output */
/* Only prestart and tail elements are masked in a mask value */
val init_masked_result_carry : forall 'n 'm 'p, 'n >= 0. (int('n), int('m), int('p), bits('n)) -> result((bits('n), bits('n)), unit)
function init_masked_result_carry(num_elem, SEW, LMUL_pow, vd_val) = {
let start_element : nat = match get_start_element() {
Ok(v) => v,
Err(()) => return Err(())
};
let end_element = get_end_element();
var mask : bits('n) = undefined;
var result : bits('n) = undefined;
/* Determine the actual number of elements when lmul < 1 */
let real_num_elem = if LMUL_pow >= 0 then num_elem else num_elem / (2 ^ (0 - LMUL_pow));
assert(num_elem >= real_num_elem);
foreach (i from 0 to (num_elem - 1)) {
if i < start_element then {
/* Prestart elements defined by vstart */
result[i] = vd_val[i];
mask[i] = bitzero
} else if i > end_element then {
/* Tail elements defined by vl */
/* Mask tail is always agnostic */
result[i] = vd_val[i]; /* TODO: configuration support */
mask[i] = bitzero
} else if i >= real_num_elem then {
/* Tail elements defined by lmul < 1 */
/* Mask tail is always agnostic */
result[i] = vd_val[i]; /* TODO: configuration support */
mask[i] = bitzero
} else {
/* Active body elements */
mask[i] = bitone
}
};
Ok(result, mask)
}
/* Mask handling for cmp functions that use masks as output */
val init_masked_result_cmp : forall 'n 'm 'p, 'n >= 0. (int('n), int('m), int('p), bits('n), bits('n)) -> result((bits('n), bits('n)), unit)
function init_masked_result_cmp(num_elem, SEW, LMUL_pow, vd_val, vm_val) = {
let start_element : nat = match get_start_element() {
Ok(v) => v,
Err(()) => return Err(())
};
let end_element = get_end_element();
let mask_ag : agtype = get_vtype_vma();
var mask : bits('n) = undefined;
var result : bits('n) = undefined;
/* Determine the actual number of elements when lmul < 1 */
let real_num_elem = if LMUL_pow >= 0 then num_elem else num_elem / (2 ^ (0 - LMUL_pow));
assert(num_elem >= real_num_elem);
foreach (i from 0 to (num_elem - 1)) {
if i < start_element then {
/* Prestart elements defined by vstart */
result[i] = vd_val[i];
mask[i] = bitzero
} else if i > end_element then {
/* Tail elements defined by vl */
/* Mask tail is always agnostic */
result[i] = vd_val[i]; /* TODO: configuration support */
mask[i] = bitzero
} else if i >= real_num_elem then {
/* Tail elements defined by lmul < 1 */
/* Mask tail is always agnostic */
result[i] = vd_val[i]; /* TODO: configuration support */
mask[i] = bitzero
} else if vm_val[i] == bitzero then {
/* Inactive body elements defined by vm */
result[i] = match mask_ag {
UNDISTURBED => vd_val[i],
AGNOSTIC => vd_val[i] /* TODO: configuration support */
};
mask[i] = bitzero
} else {
/* Active body elements */
mask[i] = bitone
}
};
Ok(result, mask)
}
/* For vector load/store segment instructions:
* Read multiple register groups and concatenate them in parallel
* The whole segments with the same element index are combined together
*/
val read_vreg_seg : forall 'n 'm 'p 'q, 'n >= 0 & 'm >= 0 & nfields_range('q). (int('n), int('m), int('p), int('q), vregidx) -> vector('n, bits('q * 'm))
function read_vreg_seg(num_elem, SEW, LMUL_pow, nf, vrid) = {
let LMUL_reg : int = if LMUL_pow <= 0 then 1 else 2 ^ LMUL_pow;
var vreg_list : vector('q, vector('n, bits('m))) = vector_init(vector_init(zeros()));
var result : vector('n, bits('q * 'm)) = vector_init(zeros());
foreach (j from 0 to (nf - 1)) {
vreg_list[j] = read_vreg(num_elem, SEW, LMUL_pow, vregidx_offset(vrid, to_bits(5, j * LMUL_reg)));
};
foreach (i from 0 to (num_elem - 1)) {
result[i] = zeros('q * 'm);
foreach (j from 0 to (nf - 1)) {
result[i] = result[i] | (zero_extend(vreg_list[j][i]) << (j * 'm))
}
};
result
}
/* Shift amounts */
val get_shift_amount : forall 'n 'm, 0 <= 'n & 'm in {8, 16, 32, 64}. (bits('n), int('m)) -> nat
function get_shift_amount(bit_val, SEW) = {
let lowlog2bits = log2(SEW);
assert(0 < lowlog2bits & lowlog2bits < 'n);
unsigned(bit_val[lowlog2bits - 1 .. 0]);
}
/* Fixed point rounding increment */
val get_fixed_rounding_incr : forall ('m 'n : Int), ('m > 0 & 'n >= 0). (bits('m), int('n)) -> bits(1)
function get_fixed_rounding_incr(vec_elem, shift_amount) = {
if shift_amount == 0 then 0b0
else {
let rounding_mode = vcsr[vxrm];
match rounding_mode {
0b00 => slice(vec_elem, shift_amount - 1, 1),
0b01 => bool_to_bits(
(slice(vec_elem, shift_amount - 1, 1) == 0b1) & (slice(vec_elem, 0, shift_amount - 1) != zeros() | slice(vec_elem, shift_amount, 1) == 0b1)),
0b10 => 0b0,
0b11 => bool_to_bits(
not(slice(vec_elem, shift_amount, 1) == 0b1) & (slice(vec_elem, 0, shift_amount) != zeros()))
}
}
}
/* Fixed point unsigned saturation */
val unsigned_saturation : forall ('m 'n: Int), ('n >= 'm > 1). (int('m), bits('n)) -> bits('m)
function unsigned_saturation(len, elem) = {
if unsigned(elem) > unsigned(ones('m)) then {
vcsr[vxsat] = 0b1;
ones('m)
} else {
elem['m - 1 .. 0]
}
}
/* Fixed point signed saturation */
val signed_saturation : forall ('m 'n: Int), ('n >= 'm > 1). (int('m), bits('n)) -> bits('m)
function signed_saturation(len, elem) = {
if signed(elem) > signed(0b0 @ ones('m - 1)) then {
vcsr[vxsat] = 0b1;
0b0 @ ones('m - 1)
} else if signed(elem) < signed(0b1 @ zeros('m - 1)) then {
vcsr[vxsat] = 0b1;
0b1 @ zeros('m - 1)
} else {
elem['m - 1 .. 0]
};
}
val count_leadingzeros : (bits(64), int) -> int
function count_leadingzeros (sig, len) = {
var idx : int = -1;
assert(len == 10 | len == 23 | len == 52);
foreach (i from 0 to (len - 1)) {
if sig[i] == bitone then idx = i;
};
len - idx - 1
}