Skip to content

Commit 685e83f

Browse files
konstantinschwarzAbnikant Singhgbossukhallouhkaterynamuts
authored andcommitted
[AIE2P] Add initial AIE2P/Strix support
co-authored-by: Abnikant Singh <[email protected]> co-authored-by: Gaetan Bossu <[email protected]> co-authored-by: Hamza Khallouki <[email protected]> co-authored-by: Kateryna Muts <[email protected]> co-authored-by: Konstantin Schwarz <[email protected]> co-authored-by: Marc Ludevid Wulf <[email protected]> co-authored-by: Niwin Anto <[email protected]> co-authored-by: Sagar Maheshwari <[email protected]>
1 parent 8dd91f4 commit 685e83f

File tree

829 files changed

+346255
-18390
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

829 files changed

+346255
-18390
lines changed

README.md

+22-4
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,28 @@
22

33
This repository extends the LLVM framework to generate code for use with AMD/Xilinx AI Engine processors.
44

5-
| Architecture | clang/LLVM target | Low-level Intrinsic API | High-Level Intrinsic API (AIE_API) | Architecture Manual |
6-
| --- | --- | --- | --- | --- |
7-
| XDNA (Phoenix, Hawk Point) | --target=aie2-none-unknown-elf | [Link](https://www.xilinx.com/htmldocs/xilinx2024_1/aiengine_ml_intrinsics/intrinsics/) | [Link](https://www.xilinx.com/htmldocs/xilinx2024_1/aiengine_api/aie_api/doc/index.html) | [Link](https://docs.amd.com/r/en-US/am020-versal-aie-ml)
8-
| XDNA2 (Strix Point) | coming soon | coming soon | coming soon | coming soon
5+
<table>
6+
<thead>
7+
<tr>
8+
<th>Architecture</th>
9+
<th>clang/LLVM target</th>
10+
<th>High-Level Intrinsic API (AIE_API)</th>
11+
</tr>
12+
</thead>
13+
<tbody>
14+
<tr>
15+
<td>XDNA (Phoenix, Hawk Point)</td>
16+
<td>--target=aie2-none-unknown-elf</td>
17+
<td rowspan=2>
18+
<a href="https://www.xilinx.com/htmldocs/xilinx2024_2/aiengine_api/aie_api/doc/index.html">Link</a>
19+
</td>
20+
</tr>
21+
<tr>
22+
<td>XDNA2 (Strix Point)</td>
23+
<td>--target=aie2p-none-unknown-elf</td>
24+
</tr>
25+
</tbody>
26+
</table>
927

1028
## Architecture Overview
1129

clang/cmake/caches/Peano-AIE-runtime-libraries.cmake

+5-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ set(LLVM_FORCE_BUILD_RUNTIME "libc" CACHE STRING "")
1919
# deposits libc, libm, crt into lib/aie2-none-unknown-elf instead of lib/
2020
set(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR ON CACHE BOOL "")
2121

22-
set(LLVM_BUILTIN_TARGETS "aie-none-unknown-elf;aie2-none-unknown-elf" CACHE STRING "")
22+
set(LLVM_BUILTIN_TARGETS "aie-none-unknown-elf;aie2-none-unknown-elf;aie2p-none-unknown-elf" CACHE STRING "")
2323
set(LLVM_RUNTIME_TARGETS "${LLVM_BUILTIN_TARGETS}" CACHE STRING "")
2424

2525
foreach(target ${LLVM_BUILTIN_TARGETS})
@@ -60,6 +60,10 @@ foreach(target ${LLVM_BUILTIN_TARGETS})
6060
set(RUNTIMES_${target}_LIBCXX_EXTRA_SITE_DEFINES "_LIBCPP_REMOVE_TRANSITIVE_INCLUDES" CACHE STRING "")
6161

6262
set(RUNTIMES_${target}_LIBC_ENABLE_USE_BY_CLANG ON CACHE STRING "")
63+
# LIBC includes C++ sources which by default trigger inclusion of standard libc++ headers
64+
# However these are not available while building libc, thus disable the include explicitly
65+
set(RUNTIMES_${target}_LIBC_COMPILE_OPTIONS_DEFAULT "-nostdinc++" CACHE STRING "")
66+
6367
# configure libcxxabi build
6468
set(RUNTIMES_${target}_LIBCXXABI_ENABLE_SHARED OFF CACHE STRING "")
6569
set(RUNTIMES_${target}_LIBCXXABI_ENABLE_EXCEPTIONS OFF CACHE STRING "")

clang/cmake/caches/Peano-AIE.cmake

+2
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ set(_llvm_distribution_components
5252
clang-resource-headers
5353
builtins-aie2-none-unknown-elf
5454
runtimes-aie2-none-unknown-elf
55+
builtins-aie2p-none-unknown-elf
56+
runtimes-aie2p-none-unknown-elf
5557
${LLVM_TOOLCHAIN_TOOLS})
5658

5759
option(LLVM_BUILD_LLVM_DYLIB "" OFF)

clang/include/clang/Basic/BuiltinsAIE.def

+2
Original file line numberDiff line numberDiff line change
@@ -539,5 +539,7 @@ BUILTIN(__builtin_aiev2_sparse_pop_16_bfloat_insert_hi, "vV32y&LLLi&V32y&LLLi&i*
539539
BUILTIN(__builtin_aiev2_sparse_peek_16_bfloat_and_get_pointer, "vV32y&LLLi&i*", "nc")
540540
BUILTIN(__builtin_aiev2_sparse_peek_16_bfloat_set_lo, "vV32y&LLLi&i*", "nc")
541541
BUILTIN(__builtin_aiev2_sparse_peek_16_bfloat_insert_hi, "vV32y&LLLi&V32y&LLLi&i*", "nc")
542+
// aie2p
543+
#include "BuiltinsAIE2P.def"
542544

543545
#undef BUILTIN
+302
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,302 @@
1+
//==- BuiltinsAIE2P.def - AIE Builtin function database ----*- C++ -*-==//
2+
//
3+
// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
// (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
8+
//
9+
//===----------------------------------------------------------------------===//
10+
//
11+
// This file defines the AIE-specific builtin function database. Users of
12+
// this file must define the BUILTIN macro to make use of this information.
13+
//
14+
//===----------------------------------------------------------------------===//
15+
16+
// The format of this database matches clang/Basic/Builtins.def.
17+
// In addition to the default type specifiers (second argument to the macro),
18+
// we support the following identifiers on AIE:
19+
// n -> acc32
20+
// e -> acc48
21+
// m -> acc64
22+
// g -> accfloat
23+
24+
BUILTIN(__builtin_aie2p_event, "vi", "nc")
25+
//ext
26+
BUILTIN(__builtin_aiev2p_ext_I32_I64,"iV2ii","nc")
27+
//set
28+
BUILTIN(__builtin_aiev2p_set_I64_I32,"V2iii","nc")
29+
//upd
30+
BUILTIN(__builtin_aiev2p_upd_I64_I32,"V2iV2iii","nc")
31+
// Pack-Unpack
32+
BUILTIN(__builtin_aie2p_pack_I512_I8_I16, "V32cV32si", "nc")
33+
BUILTIN(__builtin_aie2p_pack_I512_I4_I8, "V32cV64ci", "nc")
34+
BUILTIN(__builtin_aie2p_unpack_I512_I16_I8, "V32sV32ci", "nc")
35+
BUILTIN(__builtin_aie2p_unpack_I512_I8_I4, "V64cV32ci", "nc")
36+
BUILTIN(__builtin_aie2p_pack_I1024_I8_I16, "V64cV64si", "nc")
37+
BUILTIN(__builtin_aie2p_pack_I1024_I4_I8, "V64cV128ci", "nc")
38+
BUILTIN(__builtin_aie2p_unpack_I1024_I16_I8, "V64sV64ci", "nc")
39+
BUILTIN(__builtin_aie2p_unpack_I1024_I8_I4, "V128cV64ci", "nc")
40+
//mac-mul
41+
BUILTIN(__builtin_aie2p_ACC2048_add_conf,"V32mV32mV32mi", "nc")
42+
BUILTIN(__builtin_aie2p_ACC2048_accfloat_add_conf,"V64gV64gV64gi", "nc")
43+
BUILTIN(__builtin_aie2p_I1024_I1024_ACC2048_addmac_conf,"V32mV32iV64sV32mV32mi", "nc")
44+
BUILTIN(__builtin_aie2p_I1024_I1024_ACC2048_bf_addmac_conf,"V64gV64yV64yV64gV64gi", "nc")
45+
BUILTIN(__builtin_aie2p_I1024_I1024_ACC2048_addmsc_conf,"V32mV32iV64sV32mV32mi", "nc")
46+
BUILTIN(__builtin_aie2p_I1024_I1024_ACC2048_bf_addmsc_conf,"V64gV64yV64yV64gV64gi", "nc")
47+
BUILTIN(__builtin_aie2p_I1024_I1024_ACC2048_bf_mac_conf,"V64gV64yV64yV64gi", "nc")
48+
BUILTIN(__builtin_aie2p_I1024_I1024_ACC2048_bf_mul_conf,"V64gV64yV64yi", "nc")
49+
BUILTIN(__builtin_aie2p_I1024_I1024_ACC2048_bf_msc_conf,"V64gV64yV64yV64gi", "nc")
50+
BUILTIN(__builtin_aie2p_I1024_I1024_ACC2048_bf_negmul_conf,"V64gV64yV64yi", "nc")
51+
BUILTIN(__builtin_aie2p_I1024_I1024_ACC2048_mac_conf,"V32mV32iV32iV32mi", "nc")
52+
BUILTIN(__builtin_aie2p_I1024_I1024_ACC2048_mul_conf,"V32mV32iV32ii", "nc")
53+
BUILTIN(__builtin_aie2p_I1024_I1024_ACC2048_msc_conf,"V32mV32iV32iV32mi", "nc")
54+
BUILTIN(__builtin_aie2p_I1024_I1024_ACC2048_negmul_conf,"V32mV32iV32ii", "nc")
55+
BUILTIN(__builtin_aie2p_I1024_I512_ACC2048_addmac_conf,"V32mV32iV32sV32mV32mi", "nc")
56+
BUILTIN(__builtin_aie2p_I1024_I512_ACC2048_addmsc_conf,"V32mV32iV32sV32mV32mi", "nc")
57+
BUILTIN(__builtin_aie2p_I1024_I512_ACC2048_mac_conf,"V32mV32iV32sV32mi", "nc")
58+
BUILTIN(__builtin_aie2p_I1024_I512_ACC2048_mul_conf,"V32mV32iV32si", "nc")
59+
BUILTIN(__builtin_aie2p_I1024_I512_ACC2048_msc_conf,"V32mV32iV32sV32mi", "nc")
60+
BUILTIN(__builtin_aie2p_I1024_I512_ACC2048_negmul_conf,"V32mV32iV32si", "nc")
61+
BUILTIN(__builtin_aie2p_I512_I1024_ACC2048_addmac_conf,"V32mV16iV64sV32mV32mi", "nc")
62+
BUILTIN(__builtin_aie2p_I512_I1024_ACC2048_addmsc_conf,"V32mV16iV64sV32mV32mi", "nc")
63+
BUILTIN(__builtin_aie2p_I512_I1024_ACC2048_mac_conf,"V32mV16iV64sV32mi", "nc")
64+
BUILTIN(__builtin_aie2p_I512_I1024_ACC2048_mul_conf,"V32mV16iV64si", "nc")
65+
BUILTIN(__builtin_aie2p_I512_I1024_ACC2048_msc_conf,"V32mV16iV64sV32mi", "nc")
66+
BUILTIN(__builtin_aie2p_I512_I1024_ACC2048_negmul_conf,"V32mV16iV64si", "nc")
67+
BUILTIN(__builtin_aie2p_I512_I512_ACC1024_addmac_conf,"V16mV32sV32sV16mV16mi", "nc")
68+
BUILTIN(__builtin_aie2p_I512_I512_ACC1024_bf_addmac_conf,"V32gV32yV32yV32gV32gi", "nc")
69+
BUILTIN(__builtin_aie2p_I512_I512_ACC1024_addmsc_conf,"V16mV32sV32sV16mV16mi", "nc")
70+
BUILTIN(__builtin_aie2p_I512_I512_ACC1024_bf_addmsc_conf,"V32gV32yV32yV32gV32gi", "nc")
71+
BUILTIN(__builtin_aie2p_I512_I512_ACC1024_bf_mac_conf,"V32gV32yV32yV32gi", "nc")
72+
BUILTIN(__builtin_aie2p_I512_I512_ACC1024_bf_mul_conf,"V32gV32yV32yi", "nc")
73+
BUILTIN(__builtin_aie2p_I512_I512_ACC1024_mac_conf,"V16mV32sV32sV16mi", "nc")
74+
BUILTIN(__builtin_aie2p_I512_I512_ACC1024_mul_conf,"V16mV32sV32si", "nc")
75+
BUILTIN(__builtin_aie2p_I512_I512_ACC1024_bf_msc_conf,"V32gV32yV32yV32gi", "nc")
76+
BUILTIN(__builtin_aie2p_I512_I512_ACC1024_bf_negmul_conf,"V32gV32yV32yi", "nc")
77+
BUILTIN(__builtin_aie2p_I512_I512_ACC1024_msc_conf,"V16mV32sV32sV16mi", "nc")
78+
BUILTIN(__builtin_aie2p_I512_I512_ACC1024_negmul_conf,"V16mV32sV32si", "nc")
79+
BUILTIN(__builtin_aie2p_I512_I512_ACC2048_addmac_conf,"V32mV16iV32sV32mV32mi", "nc")
80+
BUILTIN(__builtin_aie2p_I512_I512_ACC2048_bf_addmac_conf,"V64gV32yV32yV64gV64gi", "nc")
81+
BUILTIN(__builtin_aie2p_I512_I512_ACC2048_addmsc_conf,"V32mV16iV32sV32mV32mi", "nc")
82+
BUILTIN(__builtin_aie2p_I512_I512_ACC2048_bf_addmsc_conf,"V64gV32yV32yV64gV64gi", "nc")
83+
BUILTIN(__builtin_aie2p_I512_I512_ACC2048_bf_mac_conf,"V64gV32yV32yV64gi", "nc")
84+
BUILTIN(__builtin_aie2p_I512_I512_ACC2048_bf_mul_conf,"V64gV32yV32yi", "nc")
85+
BUILTIN(__builtin_aie2p_I512_I512_ACC2048_mac_conf,"V32mV16iV32sV32mi", "nc")
86+
BUILTIN(__builtin_aie2p_I512_I512_ACC2048_mul_conf,"V32mV16iV32si", "nc")
87+
BUILTIN(__builtin_aie2p_I512_I512_ACC2048_bf_msc_conf,"V64gV32yV32yV64gi", "nc")
88+
BUILTIN(__builtin_aie2p_I512_I512_ACC2048_bf_negmul_conf,"V64gV32yV32yi", "nc")
89+
BUILTIN(__builtin_aie2p_I512_I512_ACC2048_msc_conf,"V32mV16iV32sV32mi", "nc")
90+
BUILTIN(__builtin_aie2p_I512_I512_ACC2048_negmul_conf,"V32mV16iV32si", "nc")
91+
BUILTIN(__builtin_aie2p_I512_I512_ACC512_bf_addmac_conf,"V16gV32yV32yV16gV16gi", "nc")
92+
BUILTIN(__builtin_aie2p_I512_I512_ACC512_bf_addmsc_conf,"V16gV32yV32yV16gV16gi", "nc")
93+
BUILTIN(__builtin_aie2p_I512_I512_ACC512_bf_mac_conf,"V16gV32yV32yV16gi", "nc")
94+
BUILTIN(__builtin_aie2p_I512_I512_ACC512_bf_mul_conf,"V16gV32yV32yi", "nc")
95+
BUILTIN(__builtin_aie2p_I512_I512_ACC512_bf_msc_conf,"V16gV32yV32yV16gi", "nc")
96+
BUILTIN(__builtin_aie2p_I512_I512_ACC512_bf_negmul_conf,"V16gV32yV32yi", "nc")
97+
BUILTIN(__builtin_aie2p_ACC2048_accfloat_neg_conf,"V64gV64gi", "nc")
98+
BUILTIN(__builtin_aie2p_ACC2048_neg_conf,"V32mV32mi", "nc")
99+
BUILTIN(__builtin_aie2p_ACC2048_sub_conf,"V32mV32mV32mi", "nc")
100+
BUILTIN(__builtin_aie2p_ACC2048_accfloat_sub_conf,"V64gV64gV64gi", "nc")
101+
//srs
102+
BUILTIN(__builtin_aie2p_I256_v16_acc32_srs, "V16sV16nii", "nc")
103+
BUILTIN(__builtin_aie2p_I256_v16_acc64_srs, "V16sV16mii", "nc")
104+
BUILTIN(__builtin_aie2p_I256_v32_acc32_srs, "V32cV32nii", "nc")
105+
BUILTIN(__builtin_aie2p_I256_v8_acc64_srs, "V8iV8mii", "nc")
106+
BUILTIN(__builtin_aie2p_I512_v16_acc64_srs, "V16iV16mii", "nc")
107+
BUILTIN(__builtin_aie2p_I512_v32_acc32_srs, "V32sV32nii", "nc")
108+
BUILTIN(__builtin_aie2p_I512_v32_acc64_srs, "V32sV32mii", "nc")
109+
BUILTIN(__builtin_aie2p_I512_v64_acc32_srs, "V64cV64nii", "nc")
110+
//ups
111+
BUILTIN(__builtin_aie2p_acc32_v16_I256_ups, "V16nV16sii", "nc")
112+
BUILTIN(__builtin_aie2p_acc32_v32_I256_ups, "V32nV32cii", "nc")
113+
BUILTIN(__builtin_aie2p_acc32_v32_I512_ups, "V32nV32sii", "nc")
114+
BUILTIN(__builtin_aie2p_acc64_v16_I256_ups, "V16mV16sii", "nc")
115+
BUILTIN(__builtin_aie2p_acc64_v16_I512_ups, "V16mV16iii", "nc")
116+
BUILTIN(__builtin_aie2p_acc64_v8_I256_ups, "V8mV8iii", "nc")
117+
BUILTIN(__builtin_aie2p_acc32_v64_I512_ups, "V64nV64cii", "nc")
118+
BUILTIN(__builtin_aie2p_acc64_v32_I512_ups, "V32mV32sii", "nc")
119+
// v16accfloat to v16bfloat16 and vice-versa
120+
BUILTIN(__builtin_aie2p_v16accfloat_to_v16bf16, "V16yV16g", "nc")
121+
BUILTIN(__builtin_aie2p_v16bf16_to_v16accfloat, "V16gV16y", "nc")
122+
BUILTIN(__builtin_aie2p_v32accfloat_to_v32bf16, "V32yV32g", "nc")
123+
BUILTIN(__builtin_aie2p_v32bf16_to_v32accfloat, "V32gV32y", "nc")
124+
// v16accfloat to v8float and vice-versa
125+
BUILTIN(__builtin_aie2p_v16accfloat_to_v8float, "V8fV16g", "nc")
126+
BUILTIN(__builtin_aie2p_v8float_to_v16accfloat, "V16gV8f", "nc")
127+
BUILTIN(__builtin_aie2p_v32accfloat_to_v16float, "V16fV32g", "nc")
128+
BUILTIN(__builtin_aie2p_v16float_to_v32accfloat, "V32gV16f", "nc")
129+
// Mode Settings
130+
// Set Control Registers
131+
BUILTIN(__builtin_aie2p_set_ctrl_reg, "vUiCUi", "nc")
132+
// Get Control Registers
133+
BUILTIN(__builtin_aie2p_get_ctrl_reg, "UiUi", "nc")
134+
// Set Status Registers
135+
BUILTIN(__builtin_aie2p_set_status_reg, "vUiCUi", "nc")
136+
// Get Status Registers
137+
BUILTIN(__builtin_aie2p_get_status_reg, "UiUi", "nc")
138+
// Get Core ID
139+
BUILTIN(__builtin_aie2p_get_coreid, "i", "nc")
140+
//bitcounting
141+
BUILTIN(__builtin_aie2p_clb, "UiUi", "nc")
142+
//semaphores
143+
BUILTIN(__builtin_aie2p_acquire, "vUiUi", "nc")
144+
BUILTIN(__builtin_aie2p_acquire_cond, "vUiUii", "nc")
145+
BUILTIN(__builtin_aie2p_release, "vUii", "nc")
146+
BUILTIN(__builtin_aie2p_release_cond, "vUiii", "nc")
147+
BUILTIN(__builtin_aie2p_done, "v", "n")
148+
// Scheduling barrier
149+
BUILTIN(__builtin_aie2p_sched_barrier, "v", "n")
150+
//addr
151+
BUILTIN(__builtin_aie2p_add_2d, "v*v*iiii&", "nc")
152+
BUILTIN(__builtin_aie2p_add_3d, "v*v*iiiii&ii&", "nc")
153+
154+
BUILTIN(__builtin_aie2p_load_4x16_lo, "V8iV8i", "nc")
155+
BUILTIN(__builtin_aie2p_load_4x16_hi, "V8iV8i", "nc")
156+
BUILTIN(__builtin_aie2p_load_4x32_lo, "V8iV8i", "nc")
157+
BUILTIN(__builtin_aie2p_load_4x32_hi, "V8iV8i", "nc")
158+
BUILTIN(__builtin_aie2p_load_4x64_lo, "V8iV8i", "nc")
159+
BUILTIN(__builtin_aie2p_load_4x64_hi, "V8iV8i", "nc")
160+
161+
//scl2vec::shift-shiftx-shift_bytes
162+
BUILTIN(__builtin_aie2p_vshift_I512_I512, "V16iV16iV16iii", "nc")
163+
BUILTIN(__builtin_aie2p_vshift_bf512_bf512, "V32yV32yV32yii", "nc")
164+
//scl2vec::insert
165+
BUILTIN(__builtin_aie2p_vinsert_bf32_bf512,"V32yV32yiV2y", "nc")
166+
BUILTIN(__builtin_aie2p_vinsert_bf64_bf512,"V32yV32yiV4y", "nc")
167+
BUILTIN(__builtin_aie2p_vinsert32_accfloat,"V16gV16gif", "nc")
168+
//scl2vec::broadcast:broadcast_zero:broadcast_one
169+
BUILTIN(__builtin_aie2p_vbroadcast_bf32_bf512, "V32yV2y", "nc")
170+
BUILTIN(__builtin_aie2p_vbroadcast_bf64_bf512, "V32yV4y", "nc")
171+
BUILTIN(__builtin_aie2p_vbroadcast_zero_acc1024, "V16m", "nc")
172+
//scl2vec::broadcast_elem
173+
BUILTIN(__builtin_aie2p_vextract_broadcast_bf32_bf512, "V32yV32yi", "nc")
174+
BUILTIN(__builtin_aie2p_vextract_broadcast128_I512, "V16iV16ii", "nc")
175+
//scl2vec::vshuffle
176+
BUILTIN(__builtin_aie2p_vshuffle,"V16iV16iV16iUi", "nc")
177+
178+
//scl2vec::vbcstshfl
179+
BUILTIN(__builtin_aie2p_vbcst_shuffle8,"V16iiUi", "nc")
180+
BUILTIN(__builtin_aie2p_vbcst_shuffle16,"V16iiUi", "nc")
181+
BUILTIN(__builtin_aie2p_vbcst_shuffle32,"V16iiUi", "nc")
182+
BUILTIN(__builtin_aie2p_vbcst_shuffle64,"V16iV2iUi", "nc")
183+
//scl2vec::ext_elem
184+
BUILTIN(__builtin_aie2p_vextract_elem8_I512, "iV64cii", "nc")
185+
BUILTIN(__builtin_aie2p_vextract_elem16_I512, "iV32sii", "nc")
186+
BUILTIN(__builtin_aie2p_vextract_elem32_I512, "iV16iii", "nc")
187+
BUILTIN(__builtin_aie2p_vextract_elem64_I512, "V2iV16iii", "nc")
188+
189+
// Streams
190+
// Cascade stream read
191+
BUILTIN(__builtin_aie2p_scd_read_vec, "V16ii", "nc")
192+
BUILTIN(__builtin_aie2p_scd_read_acc32, "V16ni", "nc")
193+
BUILTIN(__builtin_aie2p_scd_expand_lo, "V32ni", "nc")
194+
BUILTIN(__builtin_aie2p_scd_expand_hi, "V32ni", "nc")
195+
196+
// Cascade stream write
197+
BUILTIN(__builtin_aie2p_mcd_write_vec, "vV16ii", "nc")
198+
BUILTIN(__builtin_aie2p_mcd_write_acc32, "vV16ni", "nc")
199+
200+
// Scalar stream read
201+
BUILTIN(__builtin_aie2p_get_ss, "ii&", "nc")
202+
BUILTIN(__builtin_aie2p_get_ss_nb, "ii&", "nc")
203+
204+
// Scalar stream write
205+
BUILTIN(__builtin_aie2p_put_ms, "vii", "nc")
206+
BUILTIN(__builtin_aie2p_put_ms_nb, "viii&", "nc")
207+
208+
//Read data into selected accumulator lanes from cascade stream.
209+
BUILTIN(__builtin_aie2p_scd_ACC2048, "V64nii", "nc")
210+
BUILTIN(__builtin_aie2p_scd_expand_ACC1024, "V32nii", "nc")
211+
BUILTIN(__builtin_aie2p_scd_expand_ACC2048, "V64nii", "nc")
212+
BUILTIN(__builtin_aie2p_scd_expand_ACC1024_incr, "V32niv*i&", "nc")
213+
BUILTIN(__builtin_aie2p_scd_expand_ACC2048_incr, "V64niv*i&", "nc")
214+
// vabs_gtz
215+
BUILTIN(__builtin_aie2p_vabs_gtz8, "V64cV64ciUWi&", "nc")
216+
BUILTIN(__builtin_aie2p_vabs_gtz16, "V32sV32siUi&", "nc")
217+
BUILTIN(__builtin_aie2p_vabs_gtz32, "V16iV16iiUi&", "nc")
218+
219+
// vaddsub
220+
BUILTIN(__builtin_aie2p_vaddsub8, "V64cV64cV64cV2i", "nc")
221+
BUILTIN(__builtin_aie2p_vaddsub16, "V32sV32sV32sUi", "nc")
222+
BUILTIN(__builtin_aie2p_vaddsub32, "V16iV16iV16iUi", "nc")
223+
224+
// vbneg_ltz
225+
BUILTIN(__builtin_aie2p_vbneg_ltz8, "V64cV64cUWi&", "nc")
226+
BUILTIN(__builtin_aie2p_vbneg_ltz16, "V32sV32sUi&", "nc")
227+
BUILTIN(__builtin_aie2p_vbneg_ltz32, "V16iV16iUi&", "nc")
228+
229+
// veqz
230+
BUILTIN(__builtin_aie2p_veqz8, "V2iV64c", "nc")
231+
BUILTIN(__builtin_aie2p_veqz16, "UiV32s", "nc")
232+
BUILTIN(__builtin_aie2p_veqz32, "UiV16i", "nc")
233+
234+
// vge
235+
BUILTIN(__builtin_aie2p_vge8, "V2iV64cV64ci", "nc")
236+
BUILTIN(__builtin_aie2p_vge16, "UiV32sV32si", "nc")
237+
BUILTIN(__builtin_aie2p_vge32, "UiV16iV16ii", "nc")
238+
BUILTIN(__builtin_aie2p_vgebf16, "UiV32yV32y", "nc")
239+
240+
// vlt
241+
BUILTIN(__builtin_aie2p_vlt8, "V2iV64cV64ci", "nc")
242+
BUILTIN(__builtin_aie2p_vlt16, "UiV32sV32si", "nc")
243+
BUILTIN(__builtin_aie2p_vlt32, "UiV16iV16ii", "nc")
244+
BUILTIN(__builtin_aie2p_vltbf16, "UiV32yV32y", "nc")
245+
246+
// vmaxdiff_lt
247+
BUILTIN(__builtin_aie2p_vmaxdiff_lt8, "V64cV64cV64ciUWi&", "nc")
248+
BUILTIN(__builtin_aie2p_vmaxdiff_lt16, "V32sV32sV32siUi&", "nc")
249+
BUILTIN(__builtin_aie2p_vmaxdiff_lt32, "V16iV16iV16iiUi&", "nc")
250+
251+
// vmax_lt
252+
BUILTIN(__builtin_aie2p_vmax_lt8, "V64cV64cV64ciUWi&", "nc")
253+
BUILTIN(__builtin_aie2p_vmax_lt16, "V32sV32sV32siUi&", "nc")
254+
BUILTIN(__builtin_aie2p_vmax_lt32, "V16iV16iV16iiUi&", "nc")
255+
BUILTIN(__builtin_aie2p_vmax_ltbf16, "V32yV32yV32yUi&", "nc")
256+
257+
// vmin_ge
258+
BUILTIN(__builtin_aie2p_vmin_ge8, "V64cV64cV64ciUWi&", "nc")
259+
BUILTIN(__builtin_aie2p_vmin_ge16, "V32sV32sV32siUi&", "nc")
260+
BUILTIN(__builtin_aie2p_vmin_ge32, "V16iV16iV16iiUi&", "nc")
261+
BUILTIN(__builtin_aie2p_vmin_gebf16, "V32yV32yV32yUi&", "nc")
262+
263+
// vneg_gtz
264+
BUILTIN(__builtin_aie2p_vneg_gtz8, "V64cV64cUWi&", "nc")
265+
BUILTIN(__builtin_aie2p_vneg_gtz16, "V32sV32sUi&", "nc")
266+
BUILTIN(__builtin_aie2p_vneg_gtz32, "V16iV16iUi&", "nc")
267+
268+
// vsel
269+
BUILTIN(__builtin_aie2p_vsel8, "V64cV64cV64cV2i", "nc")
270+
BUILTIN(__builtin_aie2p_vsel16, "V32sV32sV32sUi", "nc")
271+
BUILTIN(__builtin_aie2p_vsel32, "V16iV16iV16iUi", "nc")
272+
273+
// vsub_ge
274+
BUILTIN(__builtin_aie2p_vsub_ge8, "V64cV64cV64ciUWi&", "nc")
275+
BUILTIN(__builtin_aie2p_vsub_ge16, "V32sV32sV32siUi&", "nc")
276+
BUILTIN(__builtin_aie2p_vsub_ge32, "V16iV16iV16iiUi&", "nc")
277+
278+
// vsub_lt
279+
BUILTIN(__builtin_aie2p_vsub_lt8, "V64cV64cV64ciUWi&", "nc")
280+
BUILTIN(__builtin_aie2p_vsub_lt16, "V32sV32sV32siUi&", "nc")
281+
BUILTIN(__builtin_aie2p_vsub_lt32, "V16iV16iV16iiUi&", "nc")
282+
283+
// vector bfloat16 to vector int
284+
BUILTIN(__builtin_aie2p_v16bf16_to_v16i32, "V16iV16yi", "nc")
285+
286+
// Read/Write for Tile Memory Map
287+
BUILTIN(__builtin_aie2p_read_tm, "iv*", "nc")
288+
BUILTIN(__builtin_aie2p_write_tm, "viv*", "nc")
289+
290+
// flt2fix and fix2flt
291+
BUILTIN(__builtin_aie2p_fx2flt, "fii", "nc")
292+
BUILTIN(__builtin_aie2p_flt2fx, "ifi", "nc")
293+
294+
// non-linear fp ops
295+
BUILTIN(__builtin_aie2p_sqrtf, "ff", "nc")
296+
BUILTIN(__builtin_aie2p_inv, "ff", "nc")
297+
BUILTIN(__builtin_aie2p_invsqrt, "ff", "nc")
298+
BUILTIN(__builtin_aie2p_exp2, "V16yV16g", "nc")
299+
BUILTIN(__builtin_aie2p_tanh, "V16yV16g", "nc")
300+
301+
//division/mod
302+
BUILTIN(__builtin_aie2p_divstep, "vUi&Ui&Ui", "nc")

clang/lib/Basic/Targets.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ std::unique_ptr<TargetInfo> AllocateTarget(const llvm::Triple &Triple,
126126

127127
case llvm::Triple::aie:
128128
case llvm::Triple::aie2:
129+
case llvm::Triple::aie2p:
129130
return std::make_unique<AIETargetInfo>(Triple, Opts);
130131

131132
case llvm::Triple::xcore:

0 commit comments

Comments
 (0)