1+ // Copyright 2021 ETH Zurich and University of Bologna.
2+ // Solderpad Hardware License, Version 0.51, see LICENSE for details.
3+ // SPDX-License-Identifier: SHL-0.51
4+ //
5+ // Author: Matteo Perotti <mperotti@iis.ee.ethz.ch>
6+
7+ #include "vector_macros.h"
8+
9+ // Positive-stride tests
10+ void TEST_CASE1 (void ) {
11+ VSET (4 , e8 , m1 );
12+ volatile uint8_t INP1 [] = {0x9f , 0xe4 , 0x19 , 0x20 , 0x8f , 0x2e , 0x05 , 0xe0 ,
13+ 0xf9 , 0xaa , 0x71 , 0xf0 , 0xc3 , 0x94 , 0xbb , 0xd3 };
14+ uint64_t stride = 3 ;
15+ asm volatile ("vlse8.v v1, (%0), %1" ::"r" (INP1 ), "r" (stride ));
16+ VCMP_U8 (1 , v1 , 0x9f , 0x20 , 0x05 , 0xaa );
17+ }
18+
19+ void TEST_CASE2 (void ) {
20+ VSET (4 , e16 , m1 );
21+ volatile uint16_t INP1 [] = {0x9fe4 , 0x1920 , 0x8f2e , 0x05e0 ,
22+ 0xf9aa , 0x71f0 , 0xc394 , 0xbbd3 };
23+ uint64_t stride = 4 ;
24+ asm volatile ("vlse16.v v1, (%0), %1" ::"r" (INP1 ), "r" (stride ));
25+ VCMP_U16 (2 , v1 , 0x9fe4 , 0x8f2e , 0xf9aa , 0xc394 );
26+ }
27+
28+ void TEST_CASE3 (void ) {
29+ VSET (4 , e32 , m1 );
30+ volatile uint32_t INP1 [] = {0x9fe41920 , 0x8f2e05e0 , 0xf9aa71f0 , 0xc394bbd3 ,
31+ 0xa11a9384 , 0xa7163840 , 0x99991348 , 0xa9f38cd1 };
32+ uint64_t stride = 8 ;
33+ asm volatile ("vlse32.v v1, (%0), %1" ::"r" (INP1 ), "r" (stride ));
34+ VCMP_U32 (3 , v1 , 0x9fe41920 , 0xf9aa71f0 , 0xa11a9384 , 0x99991348 );
35+ }
36+
37+ void TEST_CASE4 (void ) {
38+ VSET (4 , e64 , m1 );
39+ volatile uint64_t INP1 [] = {0x9fe419208f2e05e0 , 0xf9aa71f0c394bbd3 ,
40+ 0xa11a9384a7163840 , 0x99991348a9f38cd1 };
41+ uint64_t stride = 8 ;
42+ asm volatile ("vlse64.v v1, (%0), %1" ::"r" (INP1 ), "r" (stride ));
43+ VCMP_U64 (4 , v1 , 0x9fe419208f2e05e0 , 0xf9aa71f0c394bbd3 , 0xa11a9384a7163840 ,
44+ 0x99991348a9f38cd1 );
45+ }
46+
47+ // Zero-stride tests
48+ // The implementation must perform all the memory accesses
49+ void TEST_CASE5 (void ) {
50+ VSET (16 , e8 , m1 );
51+ volatile uint8_t INP1 [] = {0x9f };
52+ uint64_t stride = 0 ;
53+ asm volatile ("vlse8.v v1, (%0), %1" ::"r" (INP1 ), "r" (stride ));
54+ VCMP_U8 (5 , v1 , 0x9f , 0x9f , 0x9f , 0x9f , 0x9f , 0x9f , 0x9f , 0x9f , 0x9f , 0x9f ,
55+ 0x9f , 0x9f , 0x9f , 0x9f , 0x9f , 0x9f );
56+ }
57+
58+ // The implementation can also perform fewer accesses
59+ void TEST_CASE6 (void ) {
60+ VSET (16 , e8 , m1 );
61+ volatile uint8_t INP1 [] = {0x9f };
62+ asm volatile ("vlse8.v v1, (%0), x0" ::"r" (INP1 ));
63+ VCMP_U8 (6 , v1 , 0x9f , 0x9f , 0x9f , 0x9f , 0x9f , 0x9f , 0x9f , 0x9f , 0x9f , 0x9f ,
64+ 0x9f , 0x9f , 0x9f , 0x9f , 0x9f , 0x9f );
65+ }
66+
67+ // Different LMUL
68+ void TEST_CASE7 (void ) {
69+ VSET (8 , e64 , m2 );
70+ volatile uint64_t INP1 [] = {0x9fa831c7a11a9384 };
71+ asm volatile ("vlse64.v v2, (%0), x0" ::"r" (INP1 ));
72+ VCMP_U64 (7 , v2 , 0x9fa831c7a11a9384 , 0x9fa831c7a11a9384 , 0x9fa831c7a11a9384 ,
73+ 0x9fa831c7a11a9384 , 0x9fa831c7a11a9384 , 0x9fa831c7a11a9384 ,
74+ 0x9fa831c7a11a9384 , 0x9fa831c7a11a9384 );
75+ }
76+
77+ // Others
78+ // Negative-stride test
79+ void TEST_CASE8 (void ) {
80+ VSET (4 , e16 , m1 );
81+ volatile uint16_t INP1 [] = {0x9fe4 , 0x1920 , 0x8f2e , 0x05e0 ,
82+ 0xf9aa , 0x71f0 , 0xc394 , 0xbbd3 };
83+ uint64_t stride = -4 ;
84+ asm volatile ("vlse16.v v1, (%0), %1" ::"r" (& INP1 [7 ]), "r" (stride ));
85+ VCMP_U16 (8 , v1 , 0xbbd3 , 0x71f0 , 0x05e0 , 0x1920 );
86+ }
87+
88+ // Stride greater than default Ara AXI width == 128-bit (4 lanes)
89+ void TEST_CASE9 (void ) {
90+ VSET (2 , e64 , m1 );
91+ volatile uint64_t INP1 [] = {0x99991348a9f38cd1 , 0x9fa831c7a11a9384 ,
92+ 0x9fa831c7a11a9384 , 0x9fa831c7a11a9384 ,
93+ 0x9fa831c7a11a9384 , 0x01015ac1309bb678 };
94+ uint64_t stride = 40 ;
95+ asm volatile ("vlse64.v v1, (%0), %1" ::"r" (INP1 ), "r" (stride ));
96+ VCMP_U64 (9 , v1 , 0x99991348a9f38cd1 , 0x01015ac1309bb678 );
97+ }
98+
99+ // Fill Ara internal Load Buffer
100+ void TEST_CASE10 (void ) {
101+ VSET (8 , e64 , m1 );
102+ volatile uint64_t INP1 [] = {
103+ 0x9fe419208f2e05e0 , 0xf9aa71f0c394bbd3 , 0xa11a9384a7163840 ,
104+ 0x99991348a9f38cd1 , 0x9fa831c7a11a9384 , 0x3819759853987548 ,
105+ 0x1893179501093489 , 0x81937598aa819388 , 0x1874754791888188 ,
106+ 0x3eeeeeeee33111ae , 0x9013930148815808 , 0xab8b914891484891 ,
107+ 0x9031850931584902 , 0x3189759837598759 , 0x8319599991911111 ,
108+ 0x8913984898951989 };
109+ uint64_t stride = 16 ;
110+ asm volatile ("vlse64.v v1, (%0), %1" ::"r" (INP1 ), "r" (stride ));
111+ VCMP_U64 (10 , v1 , 0x9fe419208f2e05e0 , 0xa11a9384a7163840 , 0x9fa831c7a11a9384 ,
112+ 0x1893179501093489 , 0x1874754791888188 , 0x9013930148815808 ,
113+ 0x9031850931584902 , 0x8319599991911111 );
114+ }
115+
116+ // Masked stride loads
117+ void TEST_CASE11 (void ) {
118+ VSET (4 , e8 , m1 );
119+ volatile uint8_t INP1 [] = {0x9f , 0xe4 , 0x19 , 0x20 , 0x8f , 0x2e , 0x05 , 0xe0 ,
120+ 0xf9 , 0xaa , 0x71 , 0xf0 , 0xc3 , 0x94 , 0xbb , 0xd3 };
121+ uint64_t stride = 3 ;
122+ VLOAD_8 (v0 , 0xAA );
123+ VCLEAR (v1 );
124+ asm volatile ("vlse8.v v1, (%0), %1, v0.t" ::"r" (INP1 ), "r" (stride ));
125+ VCMP_U8 (11 , v1 , 0x00 , 0x20 , 0x00 , 0xaa );
126+ }
127+
128+ void TEST_CASE12 (void ) {
129+ VSET (4 , e16 , m1 );
130+ volatile uint16_t INP1 [] = {0x9fe4 , 0x1920 , 0x8f2e , 0x05e0 ,
131+ 0xf9aa , 0x71f0 , 0xc394 , 0xbbd3 };
132+ uint64_t stride = 4 ;
133+ VLOAD_8 (v0 , 0xAA );
134+ VCLEAR (v1 );
135+ asm volatile ("vlse16.v v1, (%0), %1, v0.t" ::"r" (INP1 ), "r" (stride ));
136+ VCMP_U16 (12 , v1 , 0 , 0x8f2e , 0 , 0xc394 );
137+ }
138+
139+ void TEST_CASE13 (void ) {
140+ VSET (4 , e32 , m1 );
141+ volatile uint32_t INP1 [] = {0x9fe41920 , 0x8f2e05e0 , 0xf9aa71f0 , 0xc394bbd3 ,
142+ 0xa11a9384 , 0xa7163840 , 0x99991348 , 0xa9f38cd1 };
143+ uint64_t stride = 8 ;
144+ VLOAD_8 (v0 , 0xAA );
145+ VCLEAR (v1 );
146+ asm volatile ("vlse32.v v1, (%0), %1, v0.t" ::"r" (INP1 ), "r" (stride ));
147+ VCMP_U32 (13 , v1 , 0 , 0xf9aa71f0 , 0 , 0x99991348 );
148+ }
149+
150+ void TEST_CASE14 (void ) {
151+ VSET (8 , e64 , m1 );
152+ volatile uint64_t INP1 [] = {
153+ 0x9fe419208f2e05e0 , 0xf9aa71f0c394bbd3 , 0xa11a9384a7163840 ,
154+ 0x99991348a9f38cd1 , 0x9fa831c7a11a9384 , 0x3819759853987548 ,
155+ 0x1893179501093489 , 0x81937598aa819388 , 0x1874754791888188 ,
156+ 0x3eeeeeeee33111ae , 0x9013930148815808 , 0xab8b914891484891 ,
157+ 0x9031850931584902 , 0x3189759837598759 , 0x8319599991911111 ,
158+ 0x8913984898951989 };
159+ uint64_t stride = 16 ;
160+ VLOAD_8 (v0 , 0xAA );
161+ VCLEAR (v1 );
162+ asm volatile ("vlse64.v v1, (%0), %1, v0.t" ::"r" (INP1 ), "r" (stride ));
163+ VCMP_U64 (14 , v1 , 0 , 0xa11a9384a7163840 , 0 , 0x1893179501093489 , 0 ,
164+ 0x9013930148815808 , 0 , 0x8319599991911111 );
165+ }
166+
167+ int main (void ) {
168+ INIT_CHECK ();
169+ enable_vec ();
170+
171+ TEST_CASE1 ();
172+ TEST_CASE2 ();
173+ TEST_CASE3 ();
174+ TEST_CASE4 ();
175+
176+ TEST_CASE5 ();
177+ TEST_CASE6 ();
178+ TEST_CASE7 ();
179+
180+ TEST_CASE8 ();
181+ TEST_CASE9 ();
182+ TEST_CASE10 ();
183+
184+ // TEST_CASE11();
185+ // TEST_CASE12();
186+ // TEST_CASE13();
187+ // TEST_CASE14();
188+
189+ EXIT_CHECK ();
190+ }
0 commit comments