|
| 1 | +//==- BuiltinsAIE2P.def - AIE Builtin function database ----*- C++ -*-==// |
| 2 | +// |
| 3 | +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +// |
| 7 | +// (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates |
| 8 | +// |
| 9 | +//===----------------------------------------------------------------------===// |
| 10 | +// |
| 11 | +// This file defines the AIE-specific builtin function database. Users of |
| 12 | +// this file must define the BUILTIN macro to make use of this information. |
| 13 | +// |
| 14 | +//===----------------------------------------------------------------------===// |
| 15 | + |
| 16 | +// The format of this database matches clang/Basic/Builtins.def. |
| 17 | +// In addition to the default type specifiers (second argument to the macro), |
| 18 | +// we support the following identifiers on AIE: |
| 19 | +// n -> acc32 |
| 20 | +// e -> acc48 |
| 21 | +// m -> acc64 |
| 22 | +// g -> accfloat |
| 23 | + |
| 24 | +BUILTIN(__builtin_aie2p_event, "vi", "nc") |
| 25 | +//ext |
| 26 | +BUILTIN(__builtin_aiev2p_ext_I32_I64,"iV2ii","nc") |
| 27 | +//set |
| 28 | +BUILTIN(__builtin_aiev2p_set_I64_I32,"V2iii","nc") |
| 29 | +//upd |
| 30 | +BUILTIN(__builtin_aiev2p_upd_I64_I32,"V2iV2iii","nc") |
| 31 | +// Pack-Unpack |
| 32 | +BUILTIN(__builtin_aie2p_pack_I512_I8_I16, "V32cV32si", "nc") |
| 33 | +BUILTIN(__builtin_aie2p_pack_I512_I4_I8, "V32cV64ci", "nc") |
| 34 | +BUILTIN(__builtin_aie2p_unpack_I512_I16_I8, "V32sV32ci", "nc") |
| 35 | +BUILTIN(__builtin_aie2p_unpack_I512_I8_I4, "V64cV32ci", "nc") |
| 36 | +BUILTIN(__builtin_aie2p_pack_I1024_I8_I16, "V64cV64si", "nc") |
| 37 | +BUILTIN(__builtin_aie2p_pack_I1024_I4_I8, "V64cV128ci", "nc") |
| 38 | +BUILTIN(__builtin_aie2p_unpack_I1024_I16_I8, "V64sV64ci", "nc") |
| 39 | +BUILTIN(__builtin_aie2p_unpack_I1024_I8_I4, "V128cV64ci", "nc") |
| 40 | +//mac-mul |
| 41 | +BUILTIN(__builtin_aie2p_ACC2048_add_conf,"V32mV32mV32mi", "nc") |
| 42 | +BUILTIN(__builtin_aie2p_ACC2048_accfloat_add_conf,"V64gV64gV64gi", "nc") |
| 43 | +BUILTIN(__builtin_aie2p_I1024_I1024_ACC2048_addmac_conf,"V32mV32iV64sV32mV32mi", "nc") |
| 44 | +BUILTIN(__builtin_aie2p_I1024_I1024_ACC2048_bf_addmac_conf,"V64gV64yV64yV64gV64gi", "nc") |
| 45 | +BUILTIN(__builtin_aie2p_I1024_I1024_ACC2048_addmsc_conf,"V32mV32iV64sV32mV32mi", "nc") |
| 46 | +BUILTIN(__builtin_aie2p_I1024_I1024_ACC2048_bf_addmsc_conf,"V64gV64yV64yV64gV64gi", "nc") |
| 47 | +BUILTIN(__builtin_aie2p_I1024_I1024_ACC2048_bf_mac_conf,"V64gV64yV64yV64gi", "nc") |
| 48 | +BUILTIN(__builtin_aie2p_I1024_I1024_ACC2048_bf_mul_conf,"V64gV64yV64yi", "nc") |
| 49 | +BUILTIN(__builtin_aie2p_I1024_I1024_ACC2048_bf_msc_conf,"V64gV64yV64yV64gi", "nc") |
| 50 | +BUILTIN(__builtin_aie2p_I1024_I1024_ACC2048_bf_negmul_conf,"V64gV64yV64yi", "nc") |
| 51 | +BUILTIN(__builtin_aie2p_I1024_I1024_ACC2048_mac_conf,"V32mV32iV32iV32mi", "nc") |
| 52 | +BUILTIN(__builtin_aie2p_I1024_I1024_ACC2048_mul_conf,"V32mV32iV32ii", "nc") |
| 53 | +BUILTIN(__builtin_aie2p_I1024_I1024_ACC2048_msc_conf,"V32mV32iV32iV32mi", "nc") |
| 54 | +BUILTIN(__builtin_aie2p_I1024_I1024_ACC2048_negmul_conf,"V32mV32iV32ii", "nc") |
| 55 | +BUILTIN(__builtin_aie2p_I1024_I512_ACC2048_addmac_conf,"V32mV32iV32sV32mV32mi", "nc") |
| 56 | +BUILTIN(__builtin_aie2p_I1024_I512_ACC2048_addmsc_conf,"V32mV32iV32sV32mV32mi", "nc") |
| 57 | +BUILTIN(__builtin_aie2p_I1024_I512_ACC2048_mac_conf,"V32mV32iV32sV32mi", "nc") |
| 58 | +BUILTIN(__builtin_aie2p_I1024_I512_ACC2048_mul_conf,"V32mV32iV32si", "nc") |
| 59 | +BUILTIN(__builtin_aie2p_I1024_I512_ACC2048_msc_conf,"V32mV32iV32sV32mi", "nc") |
| 60 | +BUILTIN(__builtin_aie2p_I1024_I512_ACC2048_negmul_conf,"V32mV32iV32si", "nc") |
| 61 | +BUILTIN(__builtin_aie2p_I512_I1024_ACC2048_addmac_conf,"V32mV16iV64sV32mV32mi", "nc") |
| 62 | +BUILTIN(__builtin_aie2p_I512_I1024_ACC2048_addmsc_conf,"V32mV16iV64sV32mV32mi", "nc") |
| 63 | +BUILTIN(__builtin_aie2p_I512_I1024_ACC2048_mac_conf,"V32mV16iV64sV32mi", "nc") |
| 64 | +BUILTIN(__builtin_aie2p_I512_I1024_ACC2048_mul_conf,"V32mV16iV64si", "nc") |
| 65 | +BUILTIN(__builtin_aie2p_I512_I1024_ACC2048_msc_conf,"V32mV16iV64sV32mi", "nc") |
| 66 | +BUILTIN(__builtin_aie2p_I512_I1024_ACC2048_negmul_conf,"V32mV16iV64si", "nc") |
| 67 | +BUILTIN(__builtin_aie2p_I512_I512_ACC1024_addmac_conf,"V16mV32sV32sV16mV16mi", "nc") |
| 68 | +BUILTIN(__builtin_aie2p_I512_I512_ACC1024_bf_addmac_conf,"V32gV32yV32yV32gV32gi", "nc") |
| 69 | +BUILTIN(__builtin_aie2p_I512_I512_ACC1024_addmsc_conf,"V16mV32sV32sV16mV16mi", "nc") |
| 70 | +BUILTIN(__builtin_aie2p_I512_I512_ACC1024_bf_addmsc_conf,"V32gV32yV32yV32gV32gi", "nc") |
| 71 | +BUILTIN(__builtin_aie2p_I512_I512_ACC1024_bf_mac_conf,"V32gV32yV32yV32gi", "nc") |
| 72 | +BUILTIN(__builtin_aie2p_I512_I512_ACC1024_bf_mul_conf,"V32gV32yV32yi", "nc") |
| 73 | +BUILTIN(__builtin_aie2p_I512_I512_ACC1024_mac_conf,"V16mV32sV32sV16mi", "nc") |
| 74 | +BUILTIN(__builtin_aie2p_I512_I512_ACC1024_mul_conf,"V16mV32sV32si", "nc") |
| 75 | +BUILTIN(__builtin_aie2p_I512_I512_ACC1024_bf_msc_conf,"V32gV32yV32yV32gi", "nc") |
| 76 | +BUILTIN(__builtin_aie2p_I512_I512_ACC1024_bf_negmul_conf,"V32gV32yV32yi", "nc") |
| 77 | +BUILTIN(__builtin_aie2p_I512_I512_ACC1024_msc_conf,"V16mV32sV32sV16mi", "nc") |
| 78 | +BUILTIN(__builtin_aie2p_I512_I512_ACC1024_negmul_conf,"V16mV32sV32si", "nc") |
| 79 | +BUILTIN(__builtin_aie2p_I512_I512_ACC2048_addmac_conf,"V32mV16iV32sV32mV32mi", "nc") |
| 80 | +BUILTIN(__builtin_aie2p_I512_I512_ACC2048_bf_addmac_conf,"V64gV32yV32yV64gV64gi", "nc") |
| 81 | +BUILTIN(__builtin_aie2p_I512_I512_ACC2048_addmsc_conf,"V32mV16iV32sV32mV32mi", "nc") |
| 82 | +BUILTIN(__builtin_aie2p_I512_I512_ACC2048_bf_addmsc_conf,"V64gV32yV32yV64gV64gi", "nc") |
| 83 | +BUILTIN(__builtin_aie2p_I512_I512_ACC2048_bf_mac_conf,"V64gV32yV32yV64gi", "nc") |
| 84 | +BUILTIN(__builtin_aie2p_I512_I512_ACC2048_bf_mul_conf,"V64gV32yV32yi", "nc") |
| 85 | +BUILTIN(__builtin_aie2p_I512_I512_ACC2048_mac_conf,"V32mV16iV32sV32mi", "nc") |
| 86 | +BUILTIN(__builtin_aie2p_I512_I512_ACC2048_mul_conf,"V32mV16iV32si", "nc") |
| 87 | +BUILTIN(__builtin_aie2p_I512_I512_ACC2048_bf_msc_conf,"V64gV32yV32yV64gi", "nc") |
| 88 | +BUILTIN(__builtin_aie2p_I512_I512_ACC2048_bf_negmul_conf,"V64gV32yV32yi", "nc") |
| 89 | +BUILTIN(__builtin_aie2p_I512_I512_ACC2048_msc_conf,"V32mV16iV32sV32mi", "nc") |
| 90 | +BUILTIN(__builtin_aie2p_I512_I512_ACC2048_negmul_conf,"V32mV16iV32si", "nc") |
| 91 | +BUILTIN(__builtin_aie2p_I512_I512_ACC512_bf_addmac_conf,"V16gV32yV32yV16gV16gi", "nc") |
| 92 | +BUILTIN(__builtin_aie2p_I512_I512_ACC512_bf_addmsc_conf,"V16gV32yV32yV16gV16gi", "nc") |
| 93 | +BUILTIN(__builtin_aie2p_I512_I512_ACC512_bf_mac_conf,"V16gV32yV32yV16gi", "nc") |
| 94 | +BUILTIN(__builtin_aie2p_I512_I512_ACC512_bf_mul_conf,"V16gV32yV32yi", "nc") |
| 95 | +BUILTIN(__builtin_aie2p_I512_I512_ACC512_bf_msc_conf,"V16gV32yV32yV16gi", "nc") |
| 96 | +BUILTIN(__builtin_aie2p_I512_I512_ACC512_bf_negmul_conf,"V16gV32yV32yi", "nc") |
| 97 | +BUILTIN(__builtin_aie2p_ACC2048_accfloat_neg_conf,"V64gV64gi", "nc") |
| 98 | +BUILTIN(__builtin_aie2p_ACC2048_neg_conf,"V32mV32mi", "nc") |
| 99 | +BUILTIN(__builtin_aie2p_ACC2048_sub_conf,"V32mV32mV32mi", "nc") |
| 100 | +BUILTIN(__builtin_aie2p_ACC2048_accfloat_sub_conf,"V64gV64gV64gi", "nc") |
| 101 | +//srs |
| 102 | +BUILTIN(__builtin_aie2p_I256_v16_acc32_srs, "V16sV16nii", "nc") |
| 103 | +BUILTIN(__builtin_aie2p_I256_v16_acc64_srs, "V16sV16mii", "nc") |
| 104 | +BUILTIN(__builtin_aie2p_I256_v32_acc32_srs, "V32cV32nii", "nc") |
| 105 | +BUILTIN(__builtin_aie2p_I256_v8_acc64_srs, "V8iV8mii", "nc") |
| 106 | +BUILTIN(__builtin_aie2p_I512_v16_acc64_srs, "V16iV16mii", "nc") |
| 107 | +BUILTIN(__builtin_aie2p_I512_v32_acc32_srs, "V32sV32nii", "nc") |
| 108 | +BUILTIN(__builtin_aie2p_I512_v32_acc64_srs, "V32sV32mii", "nc") |
| 109 | +BUILTIN(__builtin_aie2p_I512_v64_acc32_srs, "V64cV64nii", "nc") |
| 110 | +//ups |
| 111 | +BUILTIN(__builtin_aie2p_acc32_v16_I256_ups, "V16nV16sii", "nc") |
| 112 | +BUILTIN(__builtin_aie2p_acc32_v32_I256_ups, "V32nV32cii", "nc") |
| 113 | +BUILTIN(__builtin_aie2p_acc32_v32_I512_ups, "V32nV32sii", "nc") |
| 114 | +BUILTIN(__builtin_aie2p_acc64_v16_I256_ups, "V16mV16sii", "nc") |
| 115 | +BUILTIN(__builtin_aie2p_acc64_v16_I512_ups, "V16mV16iii", "nc") |
| 116 | +BUILTIN(__builtin_aie2p_acc64_v8_I256_ups, "V8mV8iii", "nc") |
| 117 | +BUILTIN(__builtin_aie2p_acc32_v64_I512_ups, "V64nV64cii", "nc") |
| 118 | +BUILTIN(__builtin_aie2p_acc64_v32_I512_ups, "V32mV32sii", "nc") |
| 119 | +// v16accfloat to v16bfloat16 and vice-versa |
| 120 | +BUILTIN(__builtin_aie2p_v16accfloat_to_v16bf16, "V16yV16g", "nc") |
| 121 | +BUILTIN(__builtin_aie2p_v16bf16_to_v16accfloat, "V16gV16y", "nc") |
| 122 | +BUILTIN(__builtin_aie2p_v32accfloat_to_v32bf16, "V32yV32g", "nc") |
| 123 | +BUILTIN(__builtin_aie2p_v32bf16_to_v32accfloat, "V32gV32y", "nc") |
| 124 | +// v16accfloat to v8float and vice-versa |
| 125 | +BUILTIN(__builtin_aie2p_v16accfloat_to_v8float, "V8fV16g", "nc") |
| 126 | +BUILTIN(__builtin_aie2p_v8float_to_v16accfloat, "V16gV8f", "nc") |
| 127 | +BUILTIN(__builtin_aie2p_v32accfloat_to_v16float, "V16fV32g", "nc") |
| 128 | +BUILTIN(__builtin_aie2p_v16float_to_v32accfloat, "V32gV16f", "nc") |
| 129 | +// Mode Settings |
| 130 | +// Set Control Registers |
| 131 | +BUILTIN(__builtin_aie2p_set_ctrl_reg, "vUiCUi", "nc") |
| 132 | +// Get Control Registers |
| 133 | +BUILTIN(__builtin_aie2p_get_ctrl_reg, "UiUi", "nc") |
| 134 | +// Set Status Registers |
| 135 | +BUILTIN(__builtin_aie2p_set_status_reg, "vUiCUi", "nc") |
| 136 | +// Get Status Registers |
| 137 | +BUILTIN(__builtin_aie2p_get_status_reg, "UiUi", "nc") |
| 138 | +// Get Core ID |
| 139 | +BUILTIN(__builtin_aie2p_get_coreid, "i", "nc") |
| 140 | +//bitcounting |
| 141 | +BUILTIN(__builtin_aie2p_clb, "UiUi", "nc") |
| 142 | +//semaphores |
| 143 | +BUILTIN(__builtin_aie2p_acquire, "vUiUi", "nc") |
| 144 | +BUILTIN(__builtin_aie2p_acquire_cond, "vUiUii", "nc") |
| 145 | +BUILTIN(__builtin_aie2p_release, "vUii", "nc") |
| 146 | +BUILTIN(__builtin_aie2p_release_cond, "vUiii", "nc") |
| 147 | +BUILTIN(__builtin_aie2p_done, "v", "n") |
| 148 | +// Scheduling barrier |
| 149 | +BUILTIN(__builtin_aie2p_sched_barrier, "v", "n") |
| 150 | +//addr |
| 151 | +BUILTIN(__builtin_aie2p_add_2d, "v*v*iiii&", "nc") |
| 152 | +BUILTIN(__builtin_aie2p_add_3d, "v*v*iiiii&ii&", "nc") |
| 153 | + |
| 154 | +BUILTIN(__builtin_aie2p_load_4x16_lo, "V8iV8i", "nc") |
| 155 | +BUILTIN(__builtin_aie2p_load_4x16_hi, "V8iV8i", "nc") |
| 156 | +BUILTIN(__builtin_aie2p_load_4x32_lo, "V8iV8i", "nc") |
| 157 | +BUILTIN(__builtin_aie2p_load_4x32_hi, "V8iV8i", "nc") |
| 158 | +BUILTIN(__builtin_aie2p_load_4x64_lo, "V8iV8i", "nc") |
| 159 | +BUILTIN(__builtin_aie2p_load_4x64_hi, "V8iV8i", "nc") |
| 160 | + |
| 161 | +//scl2vec::shift-shiftx-shift_bytes |
| 162 | +BUILTIN(__builtin_aie2p_vshift_I512_I512, "V16iV16iV16iii", "nc") |
| 163 | +BUILTIN(__builtin_aie2p_vshift_bf512_bf512, "V32yV32yV32yii", "nc") |
| 164 | +//scl2vec::insert |
| 165 | +BUILTIN(__builtin_aie2p_vinsert_bf32_bf512,"V32yV32yiV2y", "nc") |
| 166 | +BUILTIN(__builtin_aie2p_vinsert_bf64_bf512,"V32yV32yiV4y", "nc") |
| 167 | +BUILTIN(__builtin_aie2p_vinsert32_accfloat,"V16gV16gif", "nc") |
| 168 | +//scl2vec::broadcast:broadcast_zero:broadcast_one |
| 169 | +BUILTIN(__builtin_aie2p_vbroadcast_bf32_bf512, "V32yV2y", "nc") |
| 170 | +BUILTIN(__builtin_aie2p_vbroadcast_bf64_bf512, "V32yV4y", "nc") |
| 171 | +BUILTIN(__builtin_aie2p_vbroadcast_zero_acc1024, "V16m", "nc") |
| 172 | +//scl2vec::broadcast_elem |
| 173 | +BUILTIN(__builtin_aie2p_vextract_broadcast_bf32_bf512, "V32yV32yi", "nc") |
| 174 | +BUILTIN(__builtin_aie2p_vextract_broadcast128_I512, "V16iV16ii", "nc") |
| 175 | +//scl2vec::vshuffle |
| 176 | +BUILTIN(__builtin_aie2p_vshuffle,"V16iV16iV16iUi", "nc") |
| 177 | + |
| 178 | +//scl2vec::vbcstshfl |
| 179 | +BUILTIN(__builtin_aie2p_vbcst_shuffle8,"V16iiUi", "nc") |
| 180 | +BUILTIN(__builtin_aie2p_vbcst_shuffle16,"V16iiUi", "nc") |
| 181 | +BUILTIN(__builtin_aie2p_vbcst_shuffle32,"V16iiUi", "nc") |
| 182 | +BUILTIN(__builtin_aie2p_vbcst_shuffle64,"V16iV2iUi", "nc") |
| 183 | +//scl2vec::ext_elem |
| 184 | +BUILTIN(__builtin_aie2p_vextract_elem8_I512, "iV64cii", "nc") |
| 185 | +BUILTIN(__builtin_aie2p_vextract_elem16_I512, "iV32sii", "nc") |
| 186 | +BUILTIN(__builtin_aie2p_vextract_elem32_I512, "iV16iii", "nc") |
| 187 | +BUILTIN(__builtin_aie2p_vextract_elem64_I512, "V2iV16iii", "nc") |
| 188 | + |
| 189 | +// Streams |
| 190 | +// Cascade stream read |
| 191 | +BUILTIN(__builtin_aie2p_scd_read_vec, "V16ii", "nc") |
| 192 | +BUILTIN(__builtin_aie2p_scd_read_acc32, "V16ni", "nc") |
| 193 | +BUILTIN(__builtin_aie2p_scd_expand_lo, "V32ni", "nc") |
| 194 | +BUILTIN(__builtin_aie2p_scd_expand_hi, "V32ni", "nc") |
| 195 | + |
| 196 | +// Cascade stream write |
| 197 | +BUILTIN(__builtin_aie2p_mcd_write_vec, "vV16ii", "nc") |
| 198 | +BUILTIN(__builtin_aie2p_mcd_write_acc32, "vV16ni", "nc") |
| 199 | + |
| 200 | +// Scalar stream read |
| 201 | +BUILTIN(__builtin_aie2p_get_ss, "ii&", "nc") |
| 202 | +BUILTIN(__builtin_aie2p_get_ss_nb, "ii&", "nc") |
| 203 | + |
| 204 | +// Scalar stream write |
| 205 | +BUILTIN(__builtin_aie2p_put_ms, "vii", "nc") |
| 206 | +BUILTIN(__builtin_aie2p_put_ms_nb, "viii&", "nc") |
| 207 | + |
| 208 | +//Read data into selected accumulator lanes from cascade stream. |
| 209 | +BUILTIN(__builtin_aie2p_scd_ACC2048, "V64nii", "nc") |
| 210 | +BUILTIN(__builtin_aie2p_scd_expand_ACC1024, "V32nii", "nc") |
| 211 | +BUILTIN(__builtin_aie2p_scd_expand_ACC2048, "V64nii", "nc") |
| 212 | +BUILTIN(__builtin_aie2p_scd_expand_ACC1024_incr, "V32niv*i&", "nc") |
| 213 | +BUILTIN(__builtin_aie2p_scd_expand_ACC2048_incr, "V64niv*i&", "nc") |
| 214 | +// vabs_gtz |
| 215 | +BUILTIN(__builtin_aie2p_vabs_gtz8, "V64cV64ciUWi&", "nc") |
| 216 | +BUILTIN(__builtin_aie2p_vabs_gtz16, "V32sV32siUi&", "nc") |
| 217 | +BUILTIN(__builtin_aie2p_vabs_gtz32, "V16iV16iiUi&", "nc") |
| 218 | + |
| 219 | +// vaddsub |
| 220 | +BUILTIN(__builtin_aie2p_vaddsub8, "V64cV64cV64cV2i", "nc") |
| 221 | +BUILTIN(__builtin_aie2p_vaddsub16, "V32sV32sV32sUi", "nc") |
| 222 | +BUILTIN(__builtin_aie2p_vaddsub32, "V16iV16iV16iUi", "nc") |
| 223 | + |
| 224 | +// vbneg_ltz |
| 225 | +BUILTIN(__builtin_aie2p_vbneg_ltz8, "V64cV64cUWi&", "nc") |
| 226 | +BUILTIN(__builtin_aie2p_vbneg_ltz16, "V32sV32sUi&", "nc") |
| 227 | +BUILTIN(__builtin_aie2p_vbneg_ltz32, "V16iV16iUi&", "nc") |
| 228 | + |
| 229 | +// veqz |
| 230 | +BUILTIN(__builtin_aie2p_veqz8, "V2iV64c", "nc") |
| 231 | +BUILTIN(__builtin_aie2p_veqz16, "UiV32s", "nc") |
| 232 | +BUILTIN(__builtin_aie2p_veqz32, "UiV16i", "nc") |
| 233 | + |
| 234 | +// vge |
| 235 | +BUILTIN(__builtin_aie2p_vge8, "V2iV64cV64ci", "nc") |
| 236 | +BUILTIN(__builtin_aie2p_vge16, "UiV32sV32si", "nc") |
| 237 | +BUILTIN(__builtin_aie2p_vge32, "UiV16iV16ii", "nc") |
| 238 | +BUILTIN(__builtin_aie2p_vgebf16, "UiV32yV32y", "nc") |
| 239 | + |
| 240 | +// vlt |
| 241 | +BUILTIN(__builtin_aie2p_vlt8, "V2iV64cV64ci", "nc") |
| 242 | +BUILTIN(__builtin_aie2p_vlt16, "UiV32sV32si", "nc") |
| 243 | +BUILTIN(__builtin_aie2p_vlt32, "UiV16iV16ii", "nc") |
| 244 | +BUILTIN(__builtin_aie2p_vltbf16, "UiV32yV32y", "nc") |
| 245 | + |
| 246 | +// vmaxdiff_lt |
| 247 | +BUILTIN(__builtin_aie2p_vmaxdiff_lt8, "V64cV64cV64ciUWi&", "nc") |
| 248 | +BUILTIN(__builtin_aie2p_vmaxdiff_lt16, "V32sV32sV32siUi&", "nc") |
| 249 | +BUILTIN(__builtin_aie2p_vmaxdiff_lt32, "V16iV16iV16iiUi&", "nc") |
| 250 | + |
| 251 | +// vmax_lt |
| 252 | +BUILTIN(__builtin_aie2p_vmax_lt8, "V64cV64cV64ciUWi&", "nc") |
| 253 | +BUILTIN(__builtin_aie2p_vmax_lt16, "V32sV32sV32siUi&", "nc") |
| 254 | +BUILTIN(__builtin_aie2p_vmax_lt32, "V16iV16iV16iiUi&", "nc") |
| 255 | +BUILTIN(__builtin_aie2p_vmax_ltbf16, "V32yV32yV32yUi&", "nc") |
| 256 | + |
| 257 | +// vmin_ge |
| 258 | +BUILTIN(__builtin_aie2p_vmin_ge8, "V64cV64cV64ciUWi&", "nc") |
| 259 | +BUILTIN(__builtin_aie2p_vmin_ge16, "V32sV32sV32siUi&", "nc") |
| 260 | +BUILTIN(__builtin_aie2p_vmin_ge32, "V16iV16iV16iiUi&", "nc") |
| 261 | +BUILTIN(__builtin_aie2p_vmin_gebf16, "V32yV32yV32yUi&", "nc") |
| 262 | + |
| 263 | +// vneg_gtz |
| 264 | +BUILTIN(__builtin_aie2p_vneg_gtz8, "V64cV64cUWi&", "nc") |
| 265 | +BUILTIN(__builtin_aie2p_vneg_gtz16, "V32sV32sUi&", "nc") |
| 266 | +BUILTIN(__builtin_aie2p_vneg_gtz32, "V16iV16iUi&", "nc") |
| 267 | + |
| 268 | +// vsel |
| 269 | +BUILTIN(__builtin_aie2p_vsel8, "V64cV64cV64cV2i", "nc") |
| 270 | +BUILTIN(__builtin_aie2p_vsel16, "V32sV32sV32sUi", "nc") |
| 271 | +BUILTIN(__builtin_aie2p_vsel32, "V16iV16iV16iUi", "nc") |
| 272 | + |
| 273 | +// vsub_ge |
| 274 | +BUILTIN(__builtin_aie2p_vsub_ge8, "V64cV64cV64ciUWi&", "nc") |
| 275 | +BUILTIN(__builtin_aie2p_vsub_ge16, "V32sV32sV32siUi&", "nc") |
| 276 | +BUILTIN(__builtin_aie2p_vsub_ge32, "V16iV16iV16iiUi&", "nc") |
| 277 | + |
| 278 | +// vsub_lt |
| 279 | +BUILTIN(__builtin_aie2p_vsub_lt8, "V64cV64cV64ciUWi&", "nc") |
| 280 | +BUILTIN(__builtin_aie2p_vsub_lt16, "V32sV32sV32siUi&", "nc") |
| 281 | +BUILTIN(__builtin_aie2p_vsub_lt32, "V16iV16iV16iiUi&", "nc") |
| 282 | + |
| 283 | +// vector bfloat16 to vector int |
| 284 | +BUILTIN(__builtin_aie2p_v16bf16_to_v16i32, "V16iV16yi", "nc") |
| 285 | + |
| 286 | +// Read/Write for Tile Memory Map |
| 287 | +BUILTIN(__builtin_aie2p_read_tm, "iv*", "nc") |
| 288 | +BUILTIN(__builtin_aie2p_write_tm, "viv*", "nc") |
| 289 | + |
| 290 | +// flt2fix and fix2flt |
| 291 | +BUILTIN(__builtin_aie2p_fx2flt, "fii", "nc") |
| 292 | +BUILTIN(__builtin_aie2p_flt2fx, "ifi", "nc") |
| 293 | + |
| 294 | +// non-linear fp ops |
| 295 | +BUILTIN(__builtin_aie2p_sqrtf, "ff", "nc") |
| 296 | +BUILTIN(__builtin_aie2p_inv, "ff", "nc") |
| 297 | +BUILTIN(__builtin_aie2p_invsqrt, "ff", "nc") |
| 298 | +BUILTIN(__builtin_aie2p_exp2, "V16yV16g", "nc") |
| 299 | +BUILTIN(__builtin_aie2p_tanh, "V16yV16g", "nc") |
| 300 | + |
| 301 | +//division/mod |
| 302 | +BUILTIN(__builtin_aie2p_divstep, "vUi&Ui&Ui", "nc") |
0 commit comments